[llvm] [llvm][ScheduleDAG] SUnit::biasCriticalPath() does not find the critical path consistently (PR #93001)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 02:33:38 PDT 2024
https://github.com/csstormq updated https://github.com/llvm/llvm-project/pull/93001
>From 3218e892302ced9563b2d03472aaf6f5f25e068d Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Thu, 16 May 2024 16:43:18 +0800
Subject: [PATCH 1/2] [llvm][ScheduleDAG] SUnit::biasCriticalPath() does not
find the critical path consistently
Patch co-authored by AtariDreams (gfunni234 at gmail.com).
Fixes #38037.
[AMDGPU] Update test results to fix build (#92982)
---
llvm/lib/CodeGen/ScheduleDAG.cpp | 4 +-
llvm/test/CodeGen/AMDGPU/fp_to_sint.ll | 395 ++--
llvm/test/CodeGen/AMDGPU/fp_to_uint.ll | 395 ++--
llvm/test/CodeGen/AMDGPU/llvm.exp.ll | 1592 ++++++++---------
llvm/test/CodeGen/AMDGPU/llvm.exp10.ll | 1592 ++++++++---------
llvm/test/CodeGen/AMDGPU/shl.ll | 216 +--
.../test/CodeGen/X86/misched-critical-path.ll | 240 +++
7 files changed, 2320 insertions(+), 2114 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/misched-critical-path.ll
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index de8e6f63794dc..8d9a5041fc2fe 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -331,8 +331,10 @@ void SUnit::biasCriticalPath() {
unsigned MaxDepth = BestI->getSUnit()->getDepth();
for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
++I) {
- if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) {
+ MaxDepth = I->getSUnit()->getDepth();
BestI = I;
+ }
}
if (BestI != Preds.begin())
std::swap(*Preds.begin(), *BestI);
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 64063f65e288f..04ef30bd26aa5 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -253,25 +253,25 @@ define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) {
; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y,
; EG-NEXT: 8388608(1.175494e-38), -150(nan)
; EG-NEXT: ADD_INT T0.X, T0.W, literal.x,
-; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W,
-; EG-NEXT: AND_INT T0.Z, PS, literal.z,
+; EG-NEXT: AND_INT T0.Y, PS, literal.y,
+; EG-NEXT: SUB_INT T0.Z, literal.z, T0.W,
; EG-NEXT: NOT_INT T0.W, PS,
; EG-NEXT: LSHR * T3.W, PV.W, 1,
-; EG-NEXT: -127(nan), 150(2.101948e-43)
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: -127(nan), 31(4.344025e-44)
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T1.Y, T1.W, PV.Z,
-; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT T1.Y, PV.Z, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z,
+; EG-NEXT: LSHL T0.W, T1.W, PV.Y,
+; EG-NEXT: AND_INT * T1.W, T2.W, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
+; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT: CNDE_INT T0.W, PS, PV.X, PV.W,
; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z,
+; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W,
+; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y,
; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
@@ -364,79 +364,78 @@ define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %
;
; EG-LABEL: fp_to_sint_v2i64:
; EG: ; %bb.0:
-; EG-NEXT: ALU 75, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
-; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z,
-; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44)
+; EG-NEXT: BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, KC0[2].W, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.Z, KC0[2].W, literal.y,
+; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: ADD_INT T1.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T2.W, PV.Z, literal.x,
; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T0.X, literal.x, PV.W,
-; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W,
-; EG-NEXT: AND_INT T1.Z, PS, literal.y,
-; EG-NEXT: OR_INT T3.W, PV.Z, literal.z,
+; EG-NEXT: AND_INT T0.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Y, PV.W, literal.x,
+; EG-NEXT: OR_INT T1.Z, T1.Z, literal.y,
+; EG-NEXT: SUB_INT T3.W, literal.z, T0.W,
; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w,
-; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44)
-; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38)
+; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
+; EG-NEXT: 150(2.101948e-43), 8388607(1.175494e-38)
; EG-NEXT: OR_INT T1.X, PS, literal.x,
-; EG-NEXT: LSHL T1.Y, PV.W, PV.Z,
-; EG-NEXT: AND_INT T0.Z, T2.W, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
-; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT T1.Y, PV.W, literal.y,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W,
+; EG-NEXT: LSHL T3.W, PV.Z, PV.Y,
+; EG-NEXT: AND_INT * T4.W, T1.W, literal.y,
; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
-; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
-; EG-NEXT: AND_INT * T5.W, T0.X, literal.y,
-; EG-NEXT: -150(nan), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T2.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT: LSHL T5.W, PV.X, T0.X,
+; EG-NEXT: AND_INT * T6.W, T2.W, literal.x,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
-; EG-NEXT: NOT_INT T2.Y, T2.W,
-; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x,
-; EG-NEXT: NOT_INT T2.W, PV.Z,
-; EG-NEXT: LSHR * T4.W, T1.X, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T3.X, T3.W, 1,
-; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T0.W, T1.X, PV.Z,
-; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y,
+; EG-NEXT: NOT_INT T1.Y, T1.W,
+; EG-NEXT: SUB_INT T3.Z, literal.x, T0.Z,
+; EG-NEXT: NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212
+; EG-NEXT: LSHR * T2.W, T1.X, 1,
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT: LSHR T2.X, T1.Z, 1,
+; EG-NEXT: ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z,
+; EG-NEXT: AND_INT * T2.W, PV.Z, literal.y,
; EG-NEXT: -127(nan), 32(4.484155e-44)
; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
-; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
-; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y,
+; EG-NEXT: CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122
+; EG-NEXT: SETGT_INT T0.Z, PV.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y,
+; EG-NEXT: ADD_INT * T0.W, T0.W, literal.y,
; EG-NEXT: 23(3.222986e-44), -127(nan)
-; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
+; EG-NEXT: CNDE_INT T2.X, T4.W, PV.W, T3.W,
; EG-NEXT: SETGT_INT T1.Y, PS, literal.x,
-; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X,
+; EG-NEXT: CNDE_INT T1.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T0.X,
; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y,
; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44)
; EG-NEXT: XOR_INT T0.X, PV.W, PS,
-; EG-NEXT: XOR_INT T2.Y, PV.Z, PS,
+; EG-NEXT: XOR_INT T3.Y, PV.Z, PS,
; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
-; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Y, T2.Z, T0.Y,
; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: XOR_INT T0.Y, PV.W, PS,
; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W,
+; EG-NEXT: SUB_INT T1.W, PV.Y, T2.W,
; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W,
; EG-NEXT: SUB_INT T1.Y, PV.W, PS,
-; EG-NEXT: SETGT_INT T1.Z, 0.0, T3.Y,
-; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W,
+; EG-NEXT: SETGT_INT T1.Z, 0.0, T2.Y,
+; EG-NEXT: SUB_INT T1.W, PV.Z, T3.W,
; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W,
; EG-NEXT: SUB_INT T0.Z, PV.W, PS,
-; EG-NEXT: SETGT_INT T0.W, 0.0, T1.W,
+; EG-NEXT: SETGT_INT T0.W, 0.0, T0.W,
; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0,
; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0,
; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W,
@@ -567,170 +566,168 @@ define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %
;
; EG-LABEL: fp_to_sint_v4i64:
; EG: ; %bb.0:
-; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
-; EG-NEXT: ALU 54, @108, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
+; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 54, @106, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 6:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y,
+; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y,
; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
-; EG-NEXT: OR_INT T0.Z, PS, literal.x,
-; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z,
-; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44)
-; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x,
-; EG-NEXT: AND_INT T1.Z, PS, literal.y,
-; EG-NEXT: NOT_INT T4.W, PS,
-; EG-NEXT: LSHR * T5.W, PV.Z, 1,
-; EG-NEXT: -127(nan), 31(4.344025e-44)
+; EG-NEXT: OR_INT T2.W, PS, literal.x,
+; EG-NEXT: ADD_INT * T3.W, PV.W, literal.y,
+; EG-NEXT: 8388608(1.175494e-38), -150(nan)
; EG-NEXT: ADD_INT T0.X, T1.W, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
-; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
-; EG-NEXT: LSHL T3.W, T0.Z, PV.Z,
-; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W,
-; EG-NEXT: -127(nan), 32(4.484155e-44)
-; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.X, PS, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
-; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y,
-; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
-; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z,
+; EG-NEXT: BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W,
+; EG-NEXT: AND_INT T0.Z, PS, literal.z,
+; EG-NEXT: NOT_INT T4.W, PS,
+; EG-NEXT: LSHR * T5.W, PV.W, 1,
+; EG-NEXT: -127(nan), 23(3.222986e-44)
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
+; EG-NEXT: AND_INT T1.Y, T3.W, literal.x,
+; EG-NEXT: LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T3.W, KC0[4].X, literal.y,
+; EG-NEXT: ADD_INT * T4.W, PV.Y, literal.z,
; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38)
+; EG-NEXT: -150(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.Y, PS, literal.x,
+; EG-NEXT: OR_INT T1.Z, PV.W, literal.y,
+; EG-NEXT: CNDE_INT T3.W, PV.Y, PV.X, PV.Z,
+; EG-NEXT: SETGT_INT * T5.W, T0.X, literal.z,
+; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W,
-; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x,
-; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y,
-; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0,
-; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
-; EG-NEXT: 8388608(1.175494e-38), -150(nan)
-; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS,
-; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x,
-; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x,
-; EG-NEXT: NOT_INT T1.W, PV.Z,
-; EG-NEXT: LSHR * T3.W, PV.Y, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z,
-; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y,
-; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y,
-; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W,
-; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.X, T0.Z, literal.x,
-; EG-NEXT: AND_INT T4.Y, PS, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
-; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y,
-; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T2.X, PV.W, PS,
-; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
-; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
-; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
-; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: CNDE_INT T3.Y, PS, 0.0, PV.W,
+; EG-NEXT: SUB_INT T2.Z, literal.x, T1.W,
+; EG-NEXT: LSHL T1.W, PV.Z, PV.Y,
+; EG-NEXT: AND_INT * T3.W, T4.W, literal.y,
+; EG-NEXT: 150(2.101948e-43), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
+; EG-NEXT: AND_INT T2.Y, PV.Z, literal.x,
+; EG-NEXT: SUB_INT T3.Z, literal.y, T0.Y,
+; EG-NEXT: NOT_INT T4.W, T4.W,
+; EG-NEXT: LSHR * T6.W, T1.Z, 1,
+; EG-NEXT: 32(4.484155e-44), 150(2.101948e-43)
+; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z,
+; EG-NEXT: ADD_INT T0.Y, T0.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z,
+; EG-NEXT: AND_INT * T4.W, PV.Z, literal.y,
+; EG-NEXT: -127(nan), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T4.Y, T3.W, PV.Z, T1.W,
+; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT: CNDE_INT T1.W, T1.Y, T0.Z, 0.0,
+; EG-NEXT: CNDE_INT * T2.W, T2.Y, PV.X, 0.0,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
-; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y,
-; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z,
-; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z,
-; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: CNDE_INT T2.X, T5.W, PS, PV.W,
+; EG-NEXT: ASHR T1.Y, KC0[3].Z, literal.x,
+; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T1.X,
+; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
-; EG-NEXT: XOR_INT T1.Y, PV.W, PS,
+; EG-NEXT: XOR_INT T2.Y, PV.W, PS,
; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT: OR_INT T0.W, PV.Y, literal.y,
-; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X,
-; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38)
+; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y,
+; EG-NEXT: XOR_INT * T3.W, T3.Y, PV.Y,
+; EG-NEXT: SUB_INT T3.Y, PS, T1.Y,
+; EG-NEXT: SUBB_UINT T1.Z, PV.W, T1.Y,
+; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W,
+; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W,
+; EG-NEXT: SUB_INT T4.Y, PV.W, PS,
+; EG-NEXT: SUB_INT T0.Z, PV.Y, PV.Z,
+; EG-NEXT: BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W,
+; EG-NEXT: AND_INT * T4.W, KC0[3].Y, literal.y,
+; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT: ADD_INT T3.Y, PV.W, literal.x,
+; EG-NEXT: OR_INT T1.Z, PS, literal.y,
+; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.z, T0.W,
+; EG-NEXT: ADD_INT * T4.W, PV.W, literal.w,
+; EG-NEXT: -127(nan), 8388608(1.175494e-38)
+; EG-NEXT: 23(3.222986e-44), -150(nan)
+; EG-NEXT: AND_INT T1.X, KC0[3].W, literal.x,
+; EG-NEXT: ADD_INT T5.Y, PV.W, literal.y,
+; EG-NEXT: SUB_INT T2.Z, literal.z, T3.W,
+; EG-NEXT: NOT_INT T3.W, PS,
+; EG-NEXT: LSHR * T5.W, PV.Z, 1,
+; EG-NEXT: 8388607(1.175494e-38), -150(nan)
; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x,
-; EG-NEXT: AND_INT T3.Y, PS, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
-; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W,
-; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W,
-; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT: SUB_INT T5.X, PV.W, PS,
-; EG-NEXT: SETGT_INT T0.Y, 0.0, T0.Y,
-; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
-; EG-NEXT: OR_INT T1.W, PV.X, literal.x,
-; EG-NEXT: ADD_INT * T3.W, T3.X, literal.y,
-; EG-NEXT: 8388608(1.175494e-38), -150(nan)
-; EG-NEXT: ADD_INT T4.X, T3.X, literal.x,
-; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X,
-; EG-NEXT: AND_INT T2.Z, PS, literal.z,
-; EG-NEXT: NOT_INT T4.W, PS,
-; EG-NEXT: LSHR * T5.W, PV.W, 1,
-; EG-NEXT: -127(nan), 150(2.101948e-43)
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T4.Y, T1.W, PV.Z,
-; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, PS, PV.W,
+; EG-NEXT: AND_INT T6.Y, PV.Z, literal.x,
+; EG-NEXT: AND_INT T3.Z, PV.Y, literal.y,
+; EG-NEXT: OR_INT T3.W, PV.X, literal.z,
+; EG-NEXT: AND_INT * T5.W, T4.W, literal.y,
+; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
+; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z,
+; EG-NEXT: LSHL T7.Y, T1.Z, PS,
+; EG-NEXT: AND_INT T1.Z, T4.W, literal.x,
+; EG-NEXT: LSHL T4.W, PV.W, PV.Z,
+; EG-NEXT: AND_INT * T5.W, T5.Y, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: ADD_INT T6.X, T1.X, literal.x,
-; EG-NEXT: CNDE_INT T3.Y, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT * T3.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 108:
-; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
-; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x,
+; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T8.Y, PV.Z, PV.Y, 0.0,
+; EG-NEXT: CNDE_INT * T2.Z, T6.Y, PV.X, 0.0,
+; EG-NEXT: ALU clause starting at 106:
+; EG-NEXT: CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122
+; EG-NEXT: SETGT_INT * T7.W, T3.Y, literal.x,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, T3.Z,
-; EG-NEXT: AND_INT T2.Z, T6.X, literal.x,
-; EG-NEXT: NOT_INT T1.W, T6.X,
-; EG-NEXT: LSHR * T3.W, T0.W, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x,
-; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T0.W, T0.W, PV.Z,
-; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: CNDE_INT T1.X, PS, 0.0, PV.W,
+; EG-NEXT: CNDE_INT T6.Y, PS, T2.Z, T8.Y,
+; EG-NEXT: SUB_INT T1.Z, literal.x, T0.W,
+; EG-NEXT: NOT_INT T6.W, T5.Y,
+; EG-NEXT: LSHR * T7.W, T3.W, 1,
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT: ASHR T2.X, KC0[3].Y, literal.x,
+; EG-NEXT: ADD_INT T5.Y, T0.W, literal.y,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z,
+; EG-NEXT: AND_INT * T3.W, PV.Z, literal.z,
; EG-NEXT: 31(4.344025e-44), -127(nan)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W,
-; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x,
-; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X,
-; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X,
+; EG-NEXT: CNDE_INT T4.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T7.Y, T5.W, PV.Z, T4.W,
+; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT: XOR_INT T0.W, T6.Y, PV.X,
+; EG-NEXT: XOR_INT * T3.W, T1.X, PV.X,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T3.X, PS, T7.X,
-; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X,
-; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
-; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x,
+; EG-NEXT: SUB_INT T1.X, PS, T2.X,
+; EG-NEXT: SUBB_UINT T6.Y, PV.W, T2.X,
+; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T3.X,
+; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: XOR_INT T1.X, PV.W, PS,
-; EG-NEXT: XOR_INT T5.Y, PV.Z, PS,
-; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y,
-; EG-NEXT: SETGT_INT T1.W, 0.0, T4.X, BS:VEC_021/SCL_122
-; EG-NEXT: CNDE_INT * T6.W, T0.Y, T5.X, 0.0,
-; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT: XOR_INT T3.X, PV.W, PS,
+; EG-NEXT: XOR_INT T7.Y, PV.Z, PS,
+; EG-NEXT: SUB_INT T1.Z, PV.X, PV.Y,
+; EG-NEXT: SETGT_INT T3.W, 0.0, T3.Y,
+; EG-NEXT: CNDE_INT * T6.W, T0.X, T0.Z, 0.0,
+; EG-NEXT: SETGT_INT T1.X, 0.0, T0.Y,
; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0,
-; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
-; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W,
-; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W,
-; EG-NEXT: SUB_INT T3.X, PV.W, PS,
-; EG-NEXT: SETGT_INT T1.Y, 0.0, T4.Y,
-; EG-NEXT: CNDE_INT T6.Z, T0.Y, PV.Z, 0.0,
-; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, BS:VEC_021/SCL_122
-; EG-NEXT: CNDE_INT * T4.W, PV.X, T2.X, 0.0,
-; EG-NEXT: CNDE_INT T6.X, T1.W, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T4.Y, PV.Y, PV.X, 0.0,
-; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y,
-; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT: SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122
+; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W,
+; EG-NEXT: SUBB_UINT * T5.W, PV.X, T4.W,
+; EG-NEXT: SUB_INT T4.X, PV.W, PS,
+; EG-NEXT: SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122
+; EG-NEXT: CNDE_INT T6.Z, T0.X, PV.Z, 0.0,
+; EG-NEXT: SUB_INT T0.W, T0.W, T2.X,
+; EG-NEXT: CNDE_INT * T1.W, PV.X, T4.Y, 0.0,
+; EG-NEXT: CNDE_INT T6.X, T3.W, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.X, 0.0,
+; EG-NEXT: SUB_INT T0.W, T2.Y, T2.W,
+; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T4.Z, T0.X, PV.W, 0.0,
-; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
-; EG-NEXT: CNDE_INT T4.X, T1.Y, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T1.Z, T1.X, PV.W, 0.0,
+; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212
+; EG-NEXT: CNDE_INT T1.X, T0.Y, PV.W, 0.0,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T2.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptosi <4 x float> %x to <4 x i64>
store <4 x i64> %conv, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
index 5170f9c76db23..5abf82aa1aab5 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -200,25 +200,25 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x
; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y,
; EG-NEXT: 8388608(1.175494e-38), -150(nan)
; EG-NEXT: ADD_INT T0.X, T0.W, literal.x,
-; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W,
-; EG-NEXT: AND_INT T0.Z, PS, literal.z,
+; EG-NEXT: AND_INT T0.Y, PS, literal.y,
+; EG-NEXT: SUB_INT T0.Z, literal.z, T0.W,
; EG-NEXT: NOT_INT T0.W, PS,
; EG-NEXT: LSHR * T3.W, PV.W, 1,
-; EG-NEXT: -127(nan), 150(2.101948e-43)
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: -127(nan), 31(4.344025e-44)
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T1.Y, T1.W, PV.Z,
-; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT T1.Y, PV.Z, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z,
+; EG-NEXT: LSHL T0.W, T1.W, PV.Y,
+; EG-NEXT: AND_INT * T1.W, T2.W, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
+; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT: CNDE_INT T0.W, PS, PV.X, PV.W,
; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z,
+; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W,
+; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y,
; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
@@ -288,79 +288,78 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x
;
; EG-LABEL: fp_to_uint_v2f32_to_v2i64:
; EG: ; %bb.0:
-; EG-NEXT: ALU 75, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
-; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z,
-; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44)
+; EG-NEXT: BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, KC0[2].W, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.Z, KC0[2].W, literal.y,
+; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: ADD_INT T1.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T2.W, PV.Z, literal.x,
; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T0.X, literal.x, PV.W,
-; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W,
-; EG-NEXT: AND_INT T1.Z, PS, literal.y,
-; EG-NEXT: OR_INT T3.W, PV.Z, literal.z,
+; EG-NEXT: AND_INT T0.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Y, PV.W, literal.x,
+; EG-NEXT: OR_INT T1.Z, T1.Z, literal.y,
+; EG-NEXT: SUB_INT T3.W, literal.z, T0.W,
; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w,
-; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44)
-; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38)
+; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
+; EG-NEXT: 150(2.101948e-43), 8388607(1.175494e-38)
; EG-NEXT: OR_INT T1.X, PS, literal.x,
-; EG-NEXT: LSHL T1.Y, PV.W, PV.Z,
-; EG-NEXT: AND_INT T0.Z, T2.W, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
-; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT T1.Y, PV.W, literal.y,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W,
+; EG-NEXT: LSHL T3.W, PV.Z, PV.Y,
+; EG-NEXT: AND_INT * T4.W, T1.W, literal.y,
; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
-; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
-; EG-NEXT: AND_INT * T5.W, T0.X, literal.y,
-; EG-NEXT: -150(nan), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T2.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT: LSHL T5.W, PV.X, T0.X,
+; EG-NEXT: AND_INT * T6.W, T2.W, literal.x,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0,
-; EG-NEXT: NOT_INT T2.Y, T2.W,
-; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x,
-; EG-NEXT: NOT_INT T2.W, PV.Z,
-; EG-NEXT: LSHR * T4.W, T1.X, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T3.X, T3.W, 1,
-; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T0.W, T1.X, PV.Z,
-; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y,
+; EG-NEXT: NOT_INT T1.Y, T1.W,
+; EG-NEXT: SUB_INT T3.Z, literal.x, T0.Z,
+; EG-NEXT: NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212
+; EG-NEXT: LSHR * T2.W, T1.X, 1,
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT: LSHR T2.X, T1.Z, 1,
+; EG-NEXT: ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z,
+; EG-NEXT: AND_INT * T2.W, PV.Z, literal.y,
; EG-NEXT: -127(nan), 32(4.484155e-44)
; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
-; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
-; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y,
+; EG-NEXT: CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122
+; EG-NEXT: SETGT_INT T0.Z, PV.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y,
+; EG-NEXT: ADD_INT * T0.W, T0.W, literal.y,
; EG-NEXT: 23(3.222986e-44), -127(nan)
-; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
+; EG-NEXT: CNDE_INT T2.X, T4.W, PV.W, T3.W,
; EG-NEXT: SETGT_INT T1.Y, PS, literal.x,
-; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X,
+; EG-NEXT: CNDE_INT T1.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T0.X,
; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y,
; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44)
; EG-NEXT: XOR_INT T0.X, PV.W, PS,
-; EG-NEXT: XOR_INT T2.Y, PV.Z, PS,
+; EG-NEXT: XOR_INT T3.Y, PV.Z, PS,
; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
-; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Y, T2.Z, T0.Y,
; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: XOR_INT T0.Y, PV.W, PS,
; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W,
+; EG-NEXT: SUB_INT T1.W, PV.Y, T2.W,
; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W,
; EG-NEXT: SUB_INT T1.Y, PV.W, PS,
-; EG-NEXT: SETGT_INT T1.Z, 0.0, T3.Y,
-; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W,
+; EG-NEXT: SETGT_INT T1.Z, 0.0, T2.Y,
+; EG-NEXT: SUB_INT T1.W, PV.Z, T3.W,
; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W,
; EG-NEXT: SUB_INT T0.Z, PV.W, PS,
-; EG-NEXT: SETGT_INT T0.W, 0.0, T1.W,
+; EG-NEXT: SETGT_INT T0.W, 0.0, T0.W,
; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0,
; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0,
; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W,
@@ -449,170 +448,168 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x
;
; EG-LABEL: fp_to_uint_v4f32_to_v4i64:
; EG: ; %bb.0:
-; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
-; EG-NEXT: ALU 54, @108, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
+; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 54, @106, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 6:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y,
+; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y,
; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
-; EG-NEXT: OR_INT T0.Z, PS, literal.x,
-; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z,
-; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44)
-; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x,
-; EG-NEXT: AND_INT T1.Z, PS, literal.y,
-; EG-NEXT: NOT_INT T4.W, PS,
-; EG-NEXT: LSHR * T5.W, PV.Z, 1,
-; EG-NEXT: -127(nan), 31(4.344025e-44)
+; EG-NEXT: OR_INT T2.W, PS, literal.x,
+; EG-NEXT: ADD_INT * T3.W, PV.W, literal.y,
+; EG-NEXT: 8388608(1.175494e-38), -150(nan)
; EG-NEXT: ADD_INT T0.X, T1.W, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
-; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
-; EG-NEXT: LSHL T3.W, T0.Z, PV.Z,
-; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W,
-; EG-NEXT: -127(nan), 32(4.484155e-44)
-; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.X, PS, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
-; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y,
-; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
-; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z,
+; EG-NEXT: BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W,
+; EG-NEXT: AND_INT T0.Z, PS, literal.z,
+; EG-NEXT: NOT_INT T4.W, PS,
+; EG-NEXT: LSHR * T5.W, PV.W, 1,
+; EG-NEXT: -127(nan), 23(3.222986e-44)
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
+; EG-NEXT: AND_INT T1.Y, T3.W, literal.x,
+; EG-NEXT: LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T3.W, KC0[4].X, literal.y,
+; EG-NEXT: ADD_INT * T4.W, PV.Y, literal.z,
; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38)
+; EG-NEXT: -150(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.Y, PS, literal.x,
+; EG-NEXT: OR_INT T1.Z, PV.W, literal.y,
+; EG-NEXT: CNDE_INT T3.W, PV.Y, PV.X, PV.Z,
+; EG-NEXT: SETGT_INT * T5.W, T0.X, literal.z,
+; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38)
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W,
-; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x,
-; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y,
-; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0,
-; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
-; EG-NEXT: 8388608(1.175494e-38), -150(nan)
-; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS,
-; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x,
-; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x,
-; EG-NEXT: NOT_INT T1.W, PV.Z,
-; EG-NEXT: LSHR * T3.W, PV.Y, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z,
-; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y,
-; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y,
-; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W,
-; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.X, T0.Z, literal.x,
-; EG-NEXT: AND_INT T4.Y, PS, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
-; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y,
-; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T2.X, PV.W, PS,
-; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
-; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
-; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
-; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: CNDE_INT T3.Y, PS, 0.0, PV.W,
+; EG-NEXT: SUB_INT T2.Z, literal.x, T1.W,
+; EG-NEXT: LSHL T1.W, PV.Z, PV.Y,
+; EG-NEXT: AND_INT * T3.W, T4.W, literal.y,
+; EG-NEXT: 150(2.101948e-43), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
+; EG-NEXT: AND_INT T2.Y, PV.Z, literal.x,
+; EG-NEXT: SUB_INT T3.Z, literal.y, T0.Y,
+; EG-NEXT: NOT_INT T4.W, T4.W,
+; EG-NEXT: LSHR * T6.W, T1.Z, 1,
+; EG-NEXT: 32(4.484155e-44), 150(2.101948e-43)
+; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z,
+; EG-NEXT: ADD_INT T0.Y, T0.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z,
+; EG-NEXT: AND_INT * T4.W, PV.Z, literal.y,
+; EG-NEXT: -127(nan), 32(4.484155e-44)
+; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T4.Y, T3.W, PV.Z, T1.W,
+; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT: CNDE_INT T1.W, T1.Y, T0.Z, 0.0,
+; EG-NEXT: CNDE_INT * T2.W, T2.Y, PV.X, 0.0,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
-; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y,
-; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z,
-; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z,
-; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: CNDE_INT T2.X, T5.W, PS, PV.W,
+; EG-NEXT: ASHR T1.Y, KC0[3].Z, literal.x,
+; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T1.X,
+; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
-; EG-NEXT: XOR_INT T1.Y, PV.W, PS,
+; EG-NEXT: XOR_INT T2.Y, PV.W, PS,
; EG-NEXT: XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT: OR_INT T0.W, PV.Y, literal.y,
-; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X,
-; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38)
+; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y,
+; EG-NEXT: XOR_INT * T3.W, T3.Y, PV.Y,
+; EG-NEXT: SUB_INT T3.Y, PS, T1.Y,
+; EG-NEXT: SUBB_UINT T1.Z, PV.W, T1.Y,
+; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W,
+; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W,
+; EG-NEXT: SUB_INT T4.Y, PV.W, PS,
+; EG-NEXT: SUB_INT T0.Z, PV.Y, PV.Z,
+; EG-NEXT: BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W,
+; EG-NEXT: AND_INT * T4.W, KC0[3].Y, literal.y,
+; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT: ADD_INT T3.Y, PV.W, literal.x,
+; EG-NEXT: OR_INT T1.Z, PS, literal.y,
+; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.z, T0.W,
+; EG-NEXT: ADD_INT * T4.W, PV.W, literal.w,
+; EG-NEXT: -127(nan), 8388608(1.175494e-38)
+; EG-NEXT: 23(3.222986e-44), -150(nan)
+; EG-NEXT: AND_INT T1.X, KC0[3].W, literal.x,
+; EG-NEXT: ADD_INT T5.Y, PV.W, literal.y,
+; EG-NEXT: SUB_INT T2.Z, literal.z, T3.W,
+; EG-NEXT: NOT_INT T3.W, PS,
+; EG-NEXT: LSHR * T5.W, PV.Z, 1,
+; EG-NEXT: 8388607(1.175494e-38), -150(nan)
; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x,
-; EG-NEXT: AND_INT T3.Y, PS, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
-; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W,
-; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W,
-; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT: SUB_INT T5.X, PV.W, PS,
-; EG-NEXT: SETGT_INT T0.Y, 0.0, T0.Y,
-; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
-; EG-NEXT: OR_INT T1.W, PV.X, literal.x,
-; EG-NEXT: ADD_INT * T3.W, T3.X, literal.y,
-; EG-NEXT: 8388608(1.175494e-38), -150(nan)
-; EG-NEXT: ADD_INT T4.X, T3.X, literal.x,
-; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X,
-; EG-NEXT: AND_INT T2.Z, PS, literal.z,
-; EG-NEXT: NOT_INT T4.W, PS,
-; EG-NEXT: LSHR * T5.W, PV.W, 1,
-; EG-NEXT: -127(nan), 150(2.101948e-43)
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T4.Y, T1.W, PV.Z,
-; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x,
+; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, PS, PV.W,
+; EG-NEXT: AND_INT T6.Y, PV.Z, literal.x,
+; EG-NEXT: AND_INT T3.Z, PV.Y, literal.y,
+; EG-NEXT: OR_INT T3.W, PV.X, literal.z,
+; EG-NEXT: AND_INT * T5.W, T4.W, literal.y,
+; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
+; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z,
+; EG-NEXT: LSHL T7.Y, T1.Z, PS,
+; EG-NEXT: AND_INT T1.Z, T4.W, literal.x,
+; EG-NEXT: LSHL T4.W, PV.W, PV.Z,
+; EG-NEXT: AND_INT * T5.W, T5.Y, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: ADD_INT T6.X, T1.X, literal.x,
-; EG-NEXT: CNDE_INT T3.Y, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT * T3.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT: -150(nan), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 108:
-; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
-; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x,
+; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T8.Y, PV.Z, PV.Y, 0.0,
+; EG-NEXT: CNDE_INT * T2.Z, T6.Y, PV.X, 0.0,
+; EG-NEXT: ALU clause starting at 106:
+; EG-NEXT: CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122
+; EG-NEXT: SETGT_INT * T7.W, T3.Y, literal.x,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W,
-; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, T3.Z,
-; EG-NEXT: AND_INT T2.Z, T6.X, literal.x,
-; EG-NEXT: NOT_INT T1.W, T6.X,
-; EG-NEXT: LSHR * T3.W, T0.W, 1,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x,
-; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y,
-; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT: LSHL T0.W, T0.W, PV.Z,
-; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: CNDE_INT T1.X, PS, 0.0, PV.W,
+; EG-NEXT: CNDE_INT T6.Y, PS, T2.Z, T8.Y,
+; EG-NEXT: SUB_INT T1.Z, literal.x, T0.W,
+; EG-NEXT: NOT_INT T6.W, T5.Y,
+; EG-NEXT: LSHR * T7.W, T3.W, 1,
+; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT: ASHR T2.X, KC0[3].Y, literal.x,
+; EG-NEXT: ADD_INT T5.Y, T0.W, literal.y,
+; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z,
+; EG-NEXT: AND_INT * T3.W, PV.Z, literal.z,
; EG-NEXT: 31(4.344025e-44), -127(nan)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W,
-; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x,
-; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X,
-; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X,
+; EG-NEXT: CNDE_INT T4.X, PS, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T7.Y, T5.W, PV.Z, T4.W,
+; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT: XOR_INT T0.W, T6.Y, PV.X,
+; EG-NEXT: XOR_INT * T3.W, T1.X, PV.X,
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT: SUB_INT T3.X, PS, T7.X,
-; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X,
-; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
-; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x,
+; EG-NEXT: SUB_INT T1.X, PS, T2.X,
+; EG-NEXT: SUBB_UINT T6.Y, PV.W, T2.X,
+; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T3.X,
+; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: XOR_INT T1.X, PV.W, PS,
-; EG-NEXT: XOR_INT T5.Y, PV.Z, PS,
-; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y,
-; EG-NEXT: SETGT_INT T1.W, 0.0, T4.X, BS:VEC_021/SCL_122
-; EG-NEXT: CNDE_INT * T6.W, T0.Y, T5.X, 0.0,
-; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT: XOR_INT T3.X, PV.W, PS,
+; EG-NEXT: XOR_INT T7.Y, PV.Z, PS,
+; EG-NEXT: SUB_INT T1.Z, PV.X, PV.Y,
+; EG-NEXT: SETGT_INT T3.W, 0.0, T3.Y,
+; EG-NEXT: CNDE_INT * T6.W, T0.X, T0.Z, 0.0,
+; EG-NEXT: SETGT_INT T1.X, 0.0, T0.Y,
; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0,
-; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
-; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W,
-; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W,
-; EG-NEXT: SUB_INT T3.X, PV.W, PS,
-; EG-NEXT: SETGT_INT T1.Y, 0.0, T4.Y,
-; EG-NEXT: CNDE_INT T6.Z, T0.Y, PV.Z, 0.0,
-; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, BS:VEC_021/SCL_122
-; EG-NEXT: CNDE_INT * T4.W, PV.X, T2.X, 0.0,
-; EG-NEXT: CNDE_INT T6.X, T1.W, PV.W, 0.0,
-; EG-NEXT: CNDE_INT T4.Y, PV.Y, PV.X, 0.0,
-; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y,
-; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT: SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122
+; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W,
+; EG-NEXT: SUBB_UINT * T5.W, PV.X, T4.W,
+; EG-NEXT: SUB_INT T4.X, PV.W, PS,
+; EG-NEXT: SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122
+; EG-NEXT: CNDE_INT T6.Z, T0.X, PV.Z, 0.0,
+; EG-NEXT: SUB_INT T0.W, T0.W, T2.X,
+; EG-NEXT: CNDE_INT * T1.W, PV.X, T4.Y, 0.0,
+; EG-NEXT: CNDE_INT T6.X, T3.W, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.X, 0.0,
+; EG-NEXT: SUB_INT T0.W, T2.Y, T2.W,
+; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T4.Z, T0.X, PV.W, 0.0,
-; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
-; EG-NEXT: CNDE_INT T4.X, T1.Y, PV.W, 0.0,
+; EG-NEXT: CNDE_INT T1.Z, T1.X, PV.W, 0.0,
+; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212
+; EG-NEXT: CNDE_INT T1.X, T0.Y, PV.W, 0.0,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T0.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T2.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%conv = fptoui <4 x float> %x to <4 x i64>
store <4 x i64> %conv, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 7a0450761e1f1..3a867879bb809 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -228,23 +228,23 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
; R600-NEXT: -127(nan), 254(3.559298e-43)
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T0.Y, T1.X, literal.y,
; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
+; R600-NEXT: MUL_IEEE T3.W, PV.Y, literal.x,
+; R600-NEXT: CNDE_INT * T0.W, T0.W, PV.X, T2.W,
; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS,
-; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X,
+; R600-NEXT: CNDE_INT T1.Z, T1.W, PS, T1.X,
+; R600-NEXT: CNDE_INT T0.W, T1.Y, T0.Y, PV.W,
; R600-NEXT: LSHL * T1.W, PV.Z, literal.x,
; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT T1.W, PS, literal.x,
-; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.Z, PV.W,
; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; R600-NEXT: MUL_IEEE T0.W, PS, PV.W,
; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z,
@@ -258,65 +258,63 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
;
; CM-LABEL: s_exp_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W,
-; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: RNDNE T1.Z, PV.W,
; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: TRUNC T1.Z, T1.Z,
+; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
-; CM-NEXT: LSHL * T1.W, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Z, T2.Y, T3.X, PV.Z,
+; CM-NEXT: LSHL * T0.W, PV.Y, literal.x,
; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
+; CM-NEXT: CNDE_INT * T0.W, T3.W, PV.Y, PV.Z,
; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z,
; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
@@ -610,105 +608,105 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W,
-; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y,
-; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z,
-; R600-NEXT: -4096(nan), 967029397(3.122284e-04)
-; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T1.Z, PS,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT: RNDNE T0.Z, PS,
; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
-; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z,
-; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y,
+; R600-NEXT: AND_INT * T2.W, KC0[2].W, literal.y,
+; R600-NEXT: 1069064192(1.442383e+00), -4096(nan)
+; R600-NEXT: ADD T1.Z, KC0[2].W, -PS,
; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
; R600-NEXT: ADD * T1.W, T3.W, -PV.Z,
+; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Z, PS, PV.W,
+; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT: MUL_IEEE * T1.W, T2.W, literal.y,
; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
-; R600-NEXT: ADD T3.Z, PS, PV.W,
-; R600-NEXT: RNDNE T0.W, PV.Z,
-; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
-; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: TRUNC T0.Y, T1.Z,
-; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
-; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201
+; R600-NEXT: RNDNE T0.Y, PS,
+; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT: TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212
; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T0.Z, PV.W, PV.Z,
-; R600-NEXT: FLT_TO_INT T1.W, PV.Y,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x,
-; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x,
-; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y,
-; R600-NEXT: EXP_IEEE * T0.Y, PV.Z,
-; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z,
-; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x,
-; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y,
-; R600-NEXT: MIN_INT T2.W, T1.W, literal.z,
-; R600-NEXT: TRUNC * T0.W, T0.W,
+; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.y, PV.Z,
+; R600-NEXT: ADD * T1.W, T1.W, -PV.Y,
+; R600-NEXT: 209715200(1.972152e-31), 967029397(3.122284e-04)
+; R600-NEXT: ADD T1.Z, PS, PV.W,
+; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT: SETGT_UINT * T1.W, PV.Y, literal.y,
+; R600-NEXT: 209715200(1.972152e-31), -229(nan)
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.W, T0.Z,
+; R600-NEXT: SETGT_INT T0.W, T1.Y, literal.x,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT: -127(nan), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.X,
+; R600-NEXT: MAX_INT T2.W, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, PS, literal.y,
+; R600-NEXT: -330(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T1.Z, T1.Y, literal.z,
+; R600-NEXT: MIN_INT T2.W, T1.Y, literal.w,
+; R600-NEXT: TRUNC * T4.W, T0.Y,
+; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
+; R600-NEXT: FLT_TO_INT T3.X, PS,
+; R600-NEXT: ADD_INT T0.Y, PV.W, literal.x,
+; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T2.W, T1.Y, literal.z,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, PV.Y, PV.Z,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, T1.X, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T1.Z, T0.W, PS, T1.Y,
+; R600-NEXT: CNDE_INT T0.W, PV.W, PV.Z, PV.Y,
+; R600-NEXT: MAX_INT * T1.W, PV.X, literal.y,
; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
-; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T2.X, PS,
-; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x,
-; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T0.W, T1.W, literal.z,
-; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T3.X, T1.W, literal.x,
-; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x,
-; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y,
-; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y,
-; R600-NEXT: -127(nan), 209715200(1.972152e-31)
-; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x,
-; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
-; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y,
-; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y,
-; R600-NEXT: 209715200(1.972152e-31), -330(nan)
-; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x,
-; R600-NEXT: ADD_INT T2.Y, PS, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z,
-; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w,
+; R600-NEXT: SETGT_INT T0.X, T1.Y, literal.x,
+; R600-NEXT: ADD_INT T0.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T3.X, literal.z,
+; R600-NEXT: SETGT_UINT * T1.W, T3.X, literal.w,
; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x,
+; R600-NEXT: MIN_INT * T4.W, T3.X, literal.x,
; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
; R600-NEXT: ADD_INT T5.X, PV.W, literal.x,
-; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y,
-; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
-; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
-; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y,
+; R600-NEXT: ADD_INT T1.Y, T3.X, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T3.X, literal.z,
+; R600-NEXT: CNDE_INT T4.W, T1.W, T0.Y, T2.Z,
+; R600-NEXT: SETGT_INT * T5.W, T3.X, literal.y,
; R600-NEXT: -254(nan), -127(nan)
; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X,
-; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
-; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
-; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X,
-; R600-NEXT: LSHL T3.Y, PV.W, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
-; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W,
-; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y,
+; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T3.X,
+; R600-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T2.Z, T3.X, literal.x,
+; R600-NEXT: CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE * T4.W, T2.Y, literal.y,
+; R600-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212
+; R600-NEXT: LSHL T1.Y, PV.W, literal.x,
+; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
+; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS,
-; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
-; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201
+; R600-NEXT: LSHL T1.Z, PV.Z, literal.x,
; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X,
+; R600-NEXT: CNDE_INT * T1.W, T0.X, T0.Z, PV.X,
; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
-; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X,
+; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].X,
; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y,
; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
; R600-NEXT: ALU clause starting at 101:
-; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
+; R600-NEXT: CNDE_INT * T1.W, T2.Z, T1.X, T0.Y,
; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W,
-; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W,
-; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0,
+; R600-NEXT: SETGT T1.Z, literal.x, KC0[2].W,
+; R600-NEXT: CNDE T0.W, T0.Z, T1.Y, 0.0,
; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y,
; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
@@ -721,118 +719,116 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
;
; CM-LABEL: s_exp_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 98, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 18, @103, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT: AND_INT * T2.W, KC0[3].X, literal.z,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD T1.Y, KC0[3].X, -PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T0.Y, PV.W,
-; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x,
-; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
-; CM-NEXT: -4096(nan), 1069064192(1.442383e+00)
; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
-; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT: FLT_TO_INT T1.Z, PV.Y,
-; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z,
+; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
-; CM-NEXT: ADD T1.X, T0.Z, -T2.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT: RNDNE * T1.W, PV.Y,
-; CM-NEXT: 967029397(3.122284e-04), -330(nan)
-; CM-NEXT: TRUNC T2.X, PV.W,
-; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
-; CM-NEXT: ADD * T0.W, PV.X, T0.X,
-; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
-; CM-NEXT: ADD T0.Z, T1.Y, -T1.W,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: TRUNC T1.X, T1.Z,
+; CM-NEXT: RNDNE T2.Y, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
+; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T1.Y, PV.Z, literal.y,
+; CM-NEXT: TRUNC T1.Z, T2.Y,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W,
; CM-NEXT: EXP_IEEE T0.Y, T1.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
-; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z,
+; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, T1.Y, T1.X, T0.W,
+; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 209715200(1.972152e-31), -127(nan)
+; CM-NEXT: CNDE_INT T1.X, PV.W, PV.Z, T0.X,
; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
-; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), -330(nan)
-; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y,
-; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z,
-; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w,
+; CM-NEXT: SETGT_UINT T1.Z, PV.X, literal.y,
+; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
+; CM-NEXT: -330(nan), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T4.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y,
+; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x,
-; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y,
-; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x,
-; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 381(5.338947e-43), -254(nan)
; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x,
-; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T4.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.x,
+; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, PV.Y,
+; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
+; CM-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.Z,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT: -254(nan), -127(nan)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T5.X, T0.Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T2.X,
+; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T0.Y, T1.Z,
+; CM-NEXT: MUL_IEEE * T1.W, T5.X, literal.y,
+; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T5.X, T3.Z, T5.X, PV.W,
; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT: MUL_IEEE * T1.W, T2.Y, literal.y,
+; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T0.W, T2.Y, PV.W,
; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y,
; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
-; CM-NEXT: ALU clause starting at 105:
-; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
-; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z,
+; CM-NEXT: ALU clause starting at 103:
+; CM-NEXT: CNDE_INT * T0.W, T2.X, T4.X, T5.X,
+; CM-NEXT: MUL_IEEE T2.X, PV.W, T0.Z,
; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X,
; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212
; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W,
@@ -1215,8 +1211,8 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
;
; R600-LABEL: s_exp_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 69, @106, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
@@ -1224,69 +1220,68 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; R600-NEXT: ALU clause starting at 6:
; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE T1.W, PV.W, literal.x,
+; R600-NEXT: ADD * T2.W, KC0[3].Y, -PV.W,
; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T3.W, PS,
-; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: RNDNE * T3.W, PV.W,
+; R600-NEXT: TRUNC T4.W, PV.W,
+; R600-NEXT: MUL_IEEE * T5.W, T2.W, literal.x,
; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT: TRUNC * T4.W, PV.W,
+; R600-NEXT: MULADD_IEEE T2.W, T2.W, literal.x, PS,
+; R600-NEXT: FLT_TO_INT * T4.W, PV.W,
; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T0.Z, PS,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T0.W, PS, PV.W,
-; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x,
-; R600-NEXT: -330(nan), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T0.Y, PS, literal.x,
-; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y,
-; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z,
-; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MAX_INT T0.Z, PS, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.W,
+; R600-NEXT: ADD * T1.W, T1.W, -T3.W,
+; R600-NEXT: -330(nan), 967029397(3.122284e-04)
+; R600-NEXT: ADD T0.Y, PS, PV.W,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
+; R600-NEXT: ADD_INT T0.W, T4.W, literal.y,
+; R600-NEXT: SETGT_UINT * T1.W, T4.W, literal.z,
; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
; R600-NEXT: -229(nan), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y,
-; R600-NEXT: -127(nan), 209715200(1.972152e-31)
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y,
-; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z,
-; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
-; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x,
-; R600-NEXT: ADD T1.Y, KC0[3].W, -PS,
-; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y,
-; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w,
-; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT: SETGT_INT T0.W, T4.W, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: -127(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T1.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, PV.W, PV.Z, T4.W,
+; R600-NEXT: MIN_INT T0.Z, T4.W, literal.y,
+; R600-NEXT: AND_INT T2.W, KC0[3].W, literal.z,
+; R600-NEXT: MUL_IEEE * T3.W, PS, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; R600-NEXT: -4096(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T5.W, T4.W, literal.z,
+; R600-NEXT: SETGT_UINT * T6.W, T4.W, literal.w,
+; R600-NEXT: 209715200(1.972152e-31), -254(nan)
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z,
-; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
+; R600-NEXT: CNDE_INT T3.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T2.Y, T4.W, literal.x,
; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y,
-; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z,
-; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w,
+; R600-NEXT: MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212
; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
-; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
-; R600-NEXT: RNDNE T3.Y, PV.W,
-; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
-; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
+; R600-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT: RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z,
+; R600-NEXT: CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE * T1.W, T1.X, literal.y,
+; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T1.X, T6.W, T1.X, PS,
; R600-NEXT: LSHL T0.Y, PV.W, literal.x,
; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y,
-; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT: ADD * T1.W, T3.W, -PV.Y,
+; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: ADD * T1.W, T4.W, -PV.Y,
; R600-NEXT: 23(3.222986e-44), -4096(nan)
; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
; R600-NEXT: ADD T1.Y, PS, PV.W,
; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x,
; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
+; R600-NEXT: CNDE_INT * T1.W, T2.Y, T0.X, PV.X,
; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z,
@@ -1300,12 +1295,12 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z,
; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T1.X, literal.y,
; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
; R600-NEXT: FLT_TO_INT T0.W, PV.Z,
; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43)
; R600-NEXT: ADD_INT T4.X, PS, literal.x,
; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y,
@@ -1323,7 +1318,7 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; R600-NEXT: 102(1.429324e-43), -229(nan)
; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x,
; R600-NEXT: -127(nan), 0(0.000000e+00)
-; R600-NEXT: ALU clause starting at 107:
+; R600-NEXT: ALU clause starting at 106:
; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x,
; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y,
@@ -1339,25 +1334,25 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x,
; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y,
-; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z,
-; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w,
+; R600-NEXT: MUL_IEEE T4.W, PV.Y, literal.z,
+; R600-NEXT: MUL_IEEE * T5.W, T1.Z, literal.w,
; R600-NEXT: 254(3.559298e-43), -330(nan)
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
-; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x,
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
+; R600-NEXT: MUL_IEEE T6.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122
; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z,
-; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w,
+; R600-NEXT: SETGT_UINT * T4.W, T1.Y, literal.w,
; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), -229(nan)
; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W,
; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z,
+; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, T1.Z,
+; R600-NEXT: CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212
; R600-NEXT: LSHL * T3.W, T4.Y, literal.y,
; R600-NEXT: -127(nan), 23(3.222986e-44)
; R600-NEXT: ADD_INT T6.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.Z, PV.W,
; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X,
; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y,
@@ -1365,18 +1360,18 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W,
; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X,
; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z,
-; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W,
-; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y,
+; R600-NEXT: MUL_IEEE T0.W, T2.Y, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T4.W, T3.X, T1.W,
; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS,
-; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
+; R600-NEXT: CNDE_INT T1.X, T5.Y, PS, T1.X,
+; R600-NEXT: CNDE_INT T1.Y, T5.X, T2.Y, PV.W,
; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0,
; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x,
; R600-NEXT: LSHL * T1.W, PV.X, literal.y,
; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44)
-; R600-NEXT: ADD_INT T1.X, PS, literal.x,
+; R600-NEXT: ADD_INT T3.X, PS, literal.x,
; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
+; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.X, PV.Y,
; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0,
; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z,
; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF)
@@ -1397,197 +1392,193 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
;
; CM-LABEL: s_exp_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
+; CM-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 77, @108, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 6:
; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
-; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: RNDNE T1.Z, PV.W,
; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: TRUNC T1.Z, T1.Z,
+; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
+; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Y, T2.Y, T3.X, PV.Z,
; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
-; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[3].Z, literal.y,
; CM-NEXT: 23(3.222986e-44), -4096(nan)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W,
-; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
-; CM-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z,
-; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT: RNDNE * T0.W, PV.X,
-; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT * T1.W, T3.W, PV.X, PV.Y,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT: MUL_IEEE T0.Y, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT: AND_INT * T1.W, KC0[3].W, literal.z,
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y,
-; CM-NEXT: TRUNC T2.Y, PV.W,
-; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y,
-; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
-; CM-NEXT: -1026650416(-1.032789e+02), -4096(nan)
-; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
-; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT: FLT_TO_INT T0.Z, PV.Y,
-; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z,
+; CM-NEXT: ADD T2.Y, KC0[3].W, -PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y,
+; CM-NEXT: -1026650416(-1.032789e+02), 1069064192(1.442383e+00)
+; CM-NEXT: MULADD_IEEE T2.X, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212
; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
-; CM-NEXT: ADD T0.X, T0.X, -T0.W,
-; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
-; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y,
-; CM-NEXT: RNDNE * T0.W, PV.Y,
-; CM-NEXT: 967029397(3.122284e-04), -330(nan)
-; CM-NEXT: TRUNC T3.X, PV.W,
-; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
-; CM-NEXT: ADD * T1.W, PV.X, T2.X,
-; CM-NEXT: 204(2.858649e-43), 1069064192(1.442383e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
-; CM-NEXT: ADD T1.Z, T1.Y, -T0.W,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT: 102(1.429324e-43), 967029397(3.122284e-04)
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: TRUNC T3.X, T1.Z,
+; CM-NEXT: RNDNE T1.Y, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z,
+; CM-NEXT: ADD * T2.W, PV.Y, PV.X,
+; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE T0.Y, T2.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, T0.Z,
+; CM-NEXT: ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T0.Z, T3.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, PV.Z, literal.y,
+; CM-NEXT: TRUNC T1.Z, T1.Y,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Z, T1.W,
+; CM-NEXT: EXP_IEEE T1.Y, T1.W,
+; CM-NEXT: EXP_IEEE T1.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W,
-; CM-NEXT: ALU clause starting at 109:
-; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
-; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z,
-; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
-; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), -330(nan)
-; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y,
-; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z,
-; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, T3.Y, T3.X, T0.W,
+; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 209715200(1.972152e-31), -127(nan)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT: MUL_IEEE * T4.Y, PV.Y, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 108:
+; CM-NEXT: SETGT_UINT T1.Z, T2.X, literal.x,
+; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT: -229(nan), -330(nan)
+; CM-NEXT: ADD_INT T4.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T5.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y,
+; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x,
-; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y,
-; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
-; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x,
-; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 381(5.338947e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x,
-; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T1.Y,
+; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.x,
+; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
+; CM-NEXT: CNDE_INT T4.X, T0.W, PV.W, T0.Z,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT: -254(nan), -127(nan)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, T1.W, PV.W, T2.X,
+; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z,
-; CM-NEXT: LSHL T5.Y, PV.Z, literal.x,
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X,
-; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.Y, T1.Z,
+; CM-NEXT: MUL_IEEE * T1.W, T7.X, literal.y,
+; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T7.X, T3.Z, T7.X, PV.W,
+; CM-NEXT: LSHL T1.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.y,
+; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T4.X, T0.W, T0.Y, PV.W,
+; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
+; CM-NEXT: CNDE_INT * T0.W, T2.X, T5.X, PV.X,
; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
-; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W,
+; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].W,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
+; CM-NEXT: CNDE_INT * T0.W, T2.Y, T3.X, PV.X,
; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z,
+; CM-NEXT: MUL_IEEE T3.X, PV.W, PV.Z,
+; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].Z,
; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y,
; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0,
+; CM-NEXT: CNDE T0.Y, PV.Y, PV.X, 0.0,
; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y,
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.X, PV.W, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0,
+; CM-NEXT: LSHR T3.X, PV.W, literal.x,
+; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE T0.Z, T1.X, T0.X, 0.0,
; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z,
; CM-NEXT: 2(2.802597e-45), 2139095040(INF)
; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
-; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
@@ -2050,227 +2041,224 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; R600-LABEL: s_exp_v4f32:
; R600: ; %bb.0:
; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 95, @105, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 24, @201, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 6:
; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: ADD * T1.W, KC0[3].Z, -PV.W,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT: RNDNE T4.W, PS,
+; R600-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122
; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T3.W, PS,
-; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PS,
+; R600-NEXT: ADD * T1.W, T3.W, -PV.W,
; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT: TRUNC * T4.W, PV.W,
-; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T0.Z, PS,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T1.Z, PS, PV.W,
-; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
-; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
-; R600-NEXT: -330(nan), 381(5.338947e-43)
-; R600-NEXT: ADD_INT T0.X, PS, literal.x,
-; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
-; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
-; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
-; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
-; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y,
-; R600-NEXT: -4096(nan), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS,
-; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X,
-; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
-; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y,
-; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y,
-; R600-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
-; R600-NEXT: RNDNE T1.Y, PS,
-; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
-; R600-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
-; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
-; R600-NEXT: TRUNC * T2.W, PV.Y,
-; R600-NEXT: -4096(nan), 1069064192(1.442383e+00)
-; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z,
-; R600-NEXT: FLT_TO_INT T3.Y, PS,
-; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
-; R600-NEXT: ADD T0.W, T1.W, -T1.Y,
-; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z,
-; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
-; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T1.X, PS,
-; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
-; R600-NEXT: ADD T1.Z, PV.W, PV.Z,
-; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z,
-; R600-NEXT: -4096(nan), -330(nan)
+; R600-NEXT: ADD T0.W, PS, PV.W,
+; R600-NEXT: TRUNC * T1.W, T4.W,
+; R600-NEXT: FLT_TO_INT T1.W, PS,
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: MAX_INT T0.W, PV.W, literal.y,
+; R600-NEXT: MIN_INT * T2.W, PV.W, literal.z,
+; R600-NEXT: 209715200(1.972152e-31), -330(nan)
; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T2.X, PS, literal.x,
-; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
-; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w,
-; R600-NEXT: EXP_IEEE * T1.Z, PV.Z,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x,
-; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
-; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x,
-; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z,
+; R600-NEXT: ADD_INT T1.X, PS, literal.x,
+; R600-NEXT: AND_INT T0.Y, KC0[4].X, literal.y,
+; R600-NEXT: ADD_INT T1.Z, PV.W, literal.z,
+; R600-NEXT: ADD_INT * T0.W, T1.W, literal.w,
+; R600-NEXT: -254(nan), -4096(nan)
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.x,
+; R600-NEXT: -229(nan), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T2.X, T1.W, literal.x,
+; R600-NEXT: SETGT_UINT T1.Y, T1.W, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, T1.Z, T0.W,
+; R600-NEXT: SETGT_INT T0.W, T1.W, literal.x,
+; R600-NEXT: ADD * T3.W, KC0[4].X, -T0.Y,
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
-; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
-; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X,
-; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T0.Y, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T1.W,
+; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T1.X,
+; R600-NEXT: SETGT_INT * T1.W, T1.W, literal.z,
+; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z,
-; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
-; R600-NEXT: ALU clause starting at 105:
-; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x,
-; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y,
+; R600-NEXT: CNDE_INT T1.X, PS, PV.Z, PV.W,
+; R600-NEXT: RNDNE T3.Y, PV.Y,
+; R600-NEXT: MULADD_IEEE T1.Z, T3.W, literal.x, PV.X,
+; R600-NEXT: MUL_IEEE T3.W, T0.Z, literal.y,
+; R600-NEXT: MUL_IEEE * T4.W, T0.X, literal.z,
+; R600-NEXT: 1069064192(1.442383e+00), 209715200(1.972152e-31)
; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T4.Y, T2.W, PV.W, T0.Z,
+; R600-NEXT: MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z,
+; R600-NEXT: ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT: AND_INT * T3.W, KC0[3].Y, literal.z,
+; R600-NEXT: 2130706432(1.701412e+38), 967029397(3.122284e-04)
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y,
-; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
-; R600-NEXT: LSHL * T2.W, T3.Y, literal.z,
-; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
-; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T4.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
-; R600-NEXT: RNDNE T1.Z, PV.Y,
-; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z,
+; R600-NEXT: ADD T0.Y, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.W, T1.Y, T4.W, PV.X,
+; R600-NEXT: LSHL * T2.W, T1.X, literal.y,
+; R600-NEXT: 1069064192(1.442383e+00), 23(3.222986e-44)
+; R600-NEXT: AND_INT T0.X, KC0[3].W, literal.x,
+; R600-NEXT: TRUNC T1.Y, T3.Y,
+; R600-NEXT: ADD_INT T1.Z, PS, literal.y,
+; R600-NEXT: CNDE_INT T0.W, T1.W, PV.Z, PV.W,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT: -4096(nan), 1065353216(1.000000e+00)
+; R600-NEXT: MUL_IEEE T1.X, PV.W, PV.Z,
+; R600-NEXT: FLT_TO_INT T1.Y, PV.Y,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: ADD T0.W, KC0[3].W, -PV.X,
+; R600-NEXT: RNDNE * T1.W, T3.X,
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Z,
+; R600-NEXT: TRUNC T2.Y, PS,
+; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.y,
+; R600-NEXT: MUL_IEEE T2.W, PV.Z, literal.z,
+; R600-NEXT: MAX_INT * T4.W, PV.Y, literal.w,
+; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04)
+; R600-NEXT: 209715200(1.972152e-31), -330(nan)
+; R600-NEXT: ADD T4.X, KC0[3].Y, -T3.W,
+; R600-NEXT: ADD_INT T3.Y, PS, literal.x,
+; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T4.W, T1.Y, literal.z,
+; R600-NEXT: MIN_INT * T5.W, T1.Y, literal.w,
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: -229(nan), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T5.X, PS, literal.x,
+; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
+; R600-NEXT: CNDE_INT T5.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT * T6.W, T1.Y, literal.y,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T6.X, T0.Y, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, PS, PV.W, T1.Y,
+; R600-NEXT: CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: ALU clause starting at 105:
+; R600-NEXT: SETGT_INT T5.W, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE * T7.W, T4.X, literal.y,
+; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04)
+; R600-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
+; R600-NEXT: MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T2.Z, PV.W, T3.Y, T2.Z,
+; R600-NEXT: MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201
+; R600-NEXT: CNDE_INT * T2.W, T4.W, T2.W, T0.Z,
+; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T4.X, T6.W, PS, T0.Y,
+; R600-NEXT: CNDE_INT T0.Y, T3.Z, T6.X, PV.W,
+; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201
+; R600-NEXT: ADD * T1.W, T3.X, -T1.W,
+; R600-NEXT: 23(3.222986e-44), 967029397(3.122284e-04)
+; R600-NEXT: ADD T3.X, PS, PV.W,
+; R600-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T5.W, PV.X, PV.Y,
+; R600-NEXT: RNDNE T1.W, T5.X,
+; R600-NEXT: MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122
; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
-; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS,
-; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
-; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X,
-; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X,
-; R600-NEXT: 1069064192(1.442383e+00), 967029397(3.122284e-04)
-; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
-; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0,
-; R600-NEXT: ADD T1.Y, PV.Z, PV.Y,
-; R600-NEXT: TRUNC T1.Z, T1.Z,
-; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT: ADD * T1.W, T1.W, -T1.X,
-; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x,
-; R600-NEXT: ADD T2.Y, PS, PV.W,
-; R600-NEXT: FLT_TO_INT T0.Z, PV.Z,
-; R600-NEXT: TRUNC T0.W, T1.X,
-; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
-; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
-; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: MUL_IEEE T0.W, PS, literal.z,
-; R600-NEXT: EXP_IEEE * T1.W, PV.Y,
-; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
+; R600-NEXT: MULADD_IEEE T0.X, T0.X, literal.x, PS,
+; R600-NEXT: ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE T0.Z, PV.Z, PV.Y,
+; R600-NEXT: SETGT T0.W, literal.y, KC0[4].X,
+; R600-NEXT: EXP_IEEE * T1.Y, PV.X,
+; R600-NEXT: 967029397(3.122284e-04), -1026650416(-1.032789e+02)
+; R600-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
+; R600-NEXT: ADD T0.Y, PV.Y, PV.X,
+; R600-NEXT: FLT_TO_INT T0.Z, T2.Y,
+; R600-NEXT: TRUNC T0.W, T1.W,
+; R600-NEXT: MUL_IEEE * T1.W, PS, literal.x,
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x,
-; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z,
-; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w,
-; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -330(nan)
-; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x,
-; R600-NEXT: ADD_INT T3.Y, PS, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z,
-; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x,
-; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w,
+; R600-NEXT: SETGT T0.X, KC0[4].X, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: FLT_TO_INT T1.Z, PV.W,
+; R600-NEXT: MAX_INT T0.W, PV.Z, literal.z,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT: 1118925336(8.872284e+01), 209715200(1.972152e-31)
+; R600-NEXT: -330(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, PV.W, literal.z,
+; R600-NEXT: ADD_INT * T0.W, T0.Z, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: MAX_INT * T2.W, T1.Z, literal.x,
+; R600-NEXT: -330(nan), 0(0.000000e+00)
+; R600-NEXT: SETGT_UINT T5.X, T0.Z, literal.x,
+; R600-NEXT: ADD_INT T4.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212
+; R600-NEXT: SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: MIN_INT * T3.W, T1.Z, literal.w,
; R600-NEXT: -229(nan), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
-; R600-NEXT: ADD_INT T7.X, PS, literal.x,
-; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
-; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
-; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y,
+; R600-NEXT: ADD_INT T6.X, PS, literal.x,
+; R600-NEXT: ADD_INT T5.Y, T1.Z, literal.y,
+; R600-NEXT: SETGT_UINT T4.Z, T1.Z, literal.z,
+; R600-NEXT: CNDE_INT T3.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT * T4.W, T1.Z, literal.y,
; R600-NEXT: -254(nan), -127(nan)
; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y,
-; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x,
-; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W,
-; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y,
+; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221
+; R600-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
; R600-NEXT: 127(1.779649e-43), -127(nan)
-; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z,
-; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
-; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y,
-; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z,
-; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y,
-; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
-; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w,
+; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T0.Z,
+; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.X, PV.Y,
+; R600-NEXT: MIN_INT T2.Z, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE T0.W, T3.Y, literal.y,
+; R600-NEXT: MUL_IEEE * T5.W, T0.Y, literal.z,
+; R600-NEXT: 381(5.338947e-43), 209715200(1.972152e-31)
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T7.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, T2.W, PV.W, T3.Y,
+; R600-NEXT: ADD_INT T2.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T0.W, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z,
-; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
-; R600-NEXT: LSHL * T2.W, T1.Y, literal.y,
+; R600-NEXT: CNDE_INT T8.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T5.Y, T0.Z, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT: LSHL * T4.W, T4.Y, literal.y,
; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44)
-; R600-NEXT: ADD_INT T8.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
-; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
-; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
-; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x,
-; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W,
-; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
-; R600-NEXT: ALU clause starting at 204:
+; R600-NEXT: ADD_INT T7.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, PV.W,
+; R600-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
+; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T5.X, T2.Y, T1.W,
+; R600-NEXT: 1065353216(1.000000e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T5.X, T3.W, PS, T1.Y,
+; R600-NEXT: CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: ALU clause starting at 201:
; R600-NEXT: LSHL T0.Z, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X,
+; R600-NEXT: MUL_IEEE T0.W, T0.Y, T7.X,
; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
-; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0,
-; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x,
+; R600-NEXT: CNDE T4.X, PS, PV.W, 0.0,
+; R600-NEXT: SETGT T0.Y, KC0[3].W, literal.x,
; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z,
+; R600-NEXT: CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221
+; R600-NEXT: CNDE * T1.W, T0.X, T3.X, literal.z,
; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
+; R600-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y,
; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y,
-; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0,
+; R600-NEXT: CNDE T0.W, T2.X, T1.X, 0.0,
; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z,
; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
@@ -2285,8 +2273,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; CM-LABEL: s_exp_v4f32:
; CM: ; %bb.0:
; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 97, @104, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 35, @202, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
@@ -2305,224 +2293,220 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
-; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
; CM-NEXT: TRUNC T1.X, T1.Z,
-; CM-NEXT: RNDNE T2.Y, PV.W,
-; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: TRUNC T2.X, T1.Z,
+; CM-NEXT: MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y,
+; CM-NEXT: FLT_TO_INT T2.Z, T1.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT: ADD T1.X, T0.Z, -T1.Z,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T0.Z, T2.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T2.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: ADD_INT T4.X, T2.Z, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, T2.Z, literal.y,
+; CM-NEXT: CNDE_INT T0.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T2.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T2.Z,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T3.X,
+; CM-NEXT: SETGT_INT * T3.W, T2.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
+; CM-NEXT: AND_INT T3.X, KC0[3].Z, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.Y, T0.W,
+; CM-NEXT: -4096(nan), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T1.Y, T3.Y, T5.X, PV.Z,
+; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
+; CM-NEXT: RNDNE T4.X, PV.W,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, T3.W, PV.X, PV.Y,
+; CM-NEXT: ADD * T1.W, T1.X, T0.Y,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; CM-NEXT: EXP_IEEE T0.X, T1.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
-; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
-; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
-; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T1.X, T0.Z, T2.Y,
+; CM-NEXT: TRUNC T0.Y, T4.X,
+; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T1.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y,
; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z,
-; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w,
+; CM-NEXT: MIN_INT * T2.W, PV.Z, literal.w,
; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; CM-NEXT: -330(nan), 381(5.338947e-43)
-; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x,
-; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y,
+; CM-NEXT: ADD_INT T6.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, T0.Z, literal.y,
; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.x,
; CM-NEXT: -127(nan), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
-; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y,
-; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
-; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
-; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x,
-; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W,
-; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
-; CM-NEXT: LSHL T3.Y, PV.Z, literal.x,
-; CM-NEXT: TRUNC T1.Z, T2.Y,
-; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
-; CM-NEXT: FLT_TO_INT T2.Y, PV.Z,
-; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
-; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
-; CM-NEXT: ADD * T0.W, T0.Y, T2.X,
-; CM-NEXT: 381(5.338947e-43), 1069064192(1.442383e+00)
-; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Y, T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
-; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y,
-; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
-; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38)
-; CM-NEXT: -254(nan), -330(nan)
-; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x,
-; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z,
-; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
-; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x,
-; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 104:
-; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
-; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x,
+; CM-NEXT: CNDE_INT T7.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T2.Y, PV.Y, PV.X, T5.X,
+; CM-NEXT: SETGT_INT * T0.Z, T0.Z, literal.x,
; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W,
-; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
-; CM-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38)
-; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x,
-; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
-; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
-; CM-NEXT: -4096(nan), 23(3.222986e-44)
-; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
-; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
-; CM-NEXT: RNDNE * T0.W, T2.X,
-; CM-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00)
-; CM-NEXT: ADD T2.X, T2.X, -PV.W,
-; CM-NEXT: RNDNE T1.Y, PV.Z,
-; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X,
-; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
-; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00)
-; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
-; CM-NEXT: TRUNC T0.Y, T0.W,
-; CM-NEXT: TRUNC T1.Z, PV.Y,
-; CM-NEXT: ADD * T0.W, PV.X, T1.X,
+; CM-NEXT: ALU clause starting at 104:
+; CM-NEXT: ADD * T4.W, KC0[3].Z, -T3.X,
+; CM-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, T0.Z, T7.X, T2.Y,
+; CM-NEXT: MUL_IEEE T1.Z, T1.Y, literal.y,
+; CM-NEXT: CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122
+; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T3.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z,
+; CM-NEXT: LSHL T1.Z, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212
+; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00)
+; CM-NEXT: MULADD_IEEE T2.X, T3.X, literal.x, PV.W,
+; CM-NEXT: ADD T2.Y, T0.W, -T4.X,
+; CM-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T0.Z, PV.X, PV.Y,
+; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00)
+; CM-NEXT: AND_INT T0.X, KC0[4].X, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
+; CM-NEXT: -4096(nan), -1026650416(-1.032789e+02)
; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
; CM-NEXT: EXP_IEEE * T0.W, T0.W,
-; CM-NEXT: FLT_TO_INT T1.X, T1.Z,
-; CM-NEXT: FLT_TO_INT T0.Y, T0.Y,
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X,
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z,
-; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w,
-; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
-; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
-; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 1069064192(1.442383e+00)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y,
-; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
-; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w,
-; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: CNDE T2.X, T0.Z, T1.Y, 0.0,
+; CM-NEXT: ADD T1.Y, KC0[4].X, -T0.X,
+; CM-NEXT: FLT_TO_INT T0.Z, T0.Y,
+; CM-NEXT: MUL_IEEE * T1.W, PV.W, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T0.Y, PV.Z, literal.y,
+; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.z,
+; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.w,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
+; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT: RNDNE T4.X, PV.W,
+; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.W,
+; CM-NEXT: SETGT_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT: 1069064192(1.442383e+00), -127(nan)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.W,
+; CM-NEXT: MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y,
+; CM-NEXT: ADD T1.Z, T2.W, -PV.X,
+; CM-NEXT: MAX_INT * T2.W, T0.Z, literal.y,
; CM-NEXT: 967029397(3.122284e-04), -330(nan)
-; CM-NEXT: ADD T4.X, T0.Z, -T1.Y,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z,
+; CM-NEXT: ADD_INT T0.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, T0.Z, literal.y,
+; CM-NEXT: TRUNC T2.Z, T4.X,
+; CM-NEXT: ADD * T2.W, PV.Z, PV.Y,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: ADD * T3.W, PV.X, T1.Z,
-; CM-NEXT: -229(nan), -127(nan)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W,
-; CM-NEXT: EXP_IEEE T1.Z, T3.W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W,
-; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
-; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
-; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x,
-; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y,
-; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
-; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z,
+; CM-NEXT: EXP_IEEE T1.X (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE T1.Y, T2.W,
+; CM-NEXT: EXP_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT: MUL_IEEE T4.X, T0.W, literal.x,
+; CM-NEXT: FLT_TO_INT T3.Y, T2.Z,
+; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T0.Y, T0.X, T2.Y,
; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; CM-NEXT: CNDE_INT T0.X, T1.W, PV.W, T0.Z,
+; CM-NEXT: MUL_IEEE T0.Y, PV.Z, literal.x,
+; CM-NEXT: MAX_INT T2.Z, PV.Y, literal.y,
+; CM-NEXT: MIN_INT * T0.W, PV.Y, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z,
-; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w,
-; CM-NEXT: 381(5.338947e-43), -254(nan)
+; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T3.Y, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: ADD_INT T6.X, T3.Y, literal.x,
+; CM-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T1.W, T3.Y, literal.x,
; CM-NEXT: -127(nan), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
-; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x,
-; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y,
-; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z,
+; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T3.Y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Y, PV.X, T5.X,
+; CM-NEXT: MIN_INT * T2.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T5.X, T3.Y, literal.x,
+; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T3.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z,
-; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
-; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
-; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: CNDE_INT T2.Y, PV.X, T2.Y, T2.Z,
+; CM-NEXT: MUL_IEEE T2.Z, T7.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W,
-; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 205:
-; CM-NEXT: LSHL * T2.W, T5.Y, literal.x,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
-; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
+; CM-NEXT: SETGT_INT T8.X, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.Y,
+; CM-NEXT: CNDE_INT T0.Z, T4.Y, T7.X, PV.Z,
+; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
+; CM-NEXT: 127(1.779649e-43), 23(3.222986e-44)
+; CM-NEXT: ALU clause starting at 202:
+; CM-NEXT: ADD_INT T7.X, T0.W, literal.x,
+; CM-NEXT: CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z,
; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
-; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W,
-; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X,
+; CM-NEXT: CNDE_INT * T0.Z, T8.X, T0.X, T6.X,
+; CM-NEXT: MUL_IEEE * T0.W, T4.X, literal.x,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.X, T2.W, T4.X, PV.W,
+; CM-NEXT: LSHL T1.Y, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122
; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X,
; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02)
-; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0,
+; CM-NEXT: CNDE T4.X, PV.W, PV.Z, 0.0,
; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X,
+; CM-NEXT: CNDE_INT * T0.W, T8.X, T3.X, PV.X,
; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00)
-; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x,
+; CM-NEXT: SETGT T0.X, KC0[3].W, literal.x,
; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z,
; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02)
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
-; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
+; CM-NEXT: SETGT T3.X, literal.x, KC0[3].Y,
; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0,
-; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y,
+; CM-NEXT: CNDE T0.Z, PV.X, T2.X, literal.y,
; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z,
; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF)
; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00)
; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x,
-; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0,
+; CM-NEXT: CNDE T1.Z, PV.X, T1.X, 0.0,
; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y,
; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01)
; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 544c1de6c7bb7..a162949587481 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -230,23 +230,23 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
; R600-NEXT: -127(nan), 254(3.559298e-43)
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T0.Y, T1.X, literal.y,
; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.x,
+; R600-NEXT: MUL_IEEE T3.W, PV.Y, literal.x,
+; R600-NEXT: CNDE_INT * T0.W, T0.W, PV.X, T2.W,
; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T1.Z, T1.Y, T3.X, PS,
-; R600-NEXT: CNDE_INT T0.W, T1.W, PV.W, T1.X,
+; R600-NEXT: CNDE_INT T1.Z, T1.W, PS, T1.X,
+; R600-NEXT: CNDE_INT T0.W, T1.Y, T0.Y, PV.W,
; R600-NEXT: LSHL * T1.W, PV.Z, literal.x,
; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; R600-NEXT: ADD_INT T1.W, PS, literal.x,
-; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.Z, PV.W,
; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; R600-NEXT: MUL_IEEE T0.W, PS, PV.W,
; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z,
@@ -260,65 +260,63 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
;
; CM-LABEL: s_exp10_f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 64, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W,
-; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT: RNDNE T1.Z, PV.W,
; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: TRUNC T1.Z, T1.Z,
+; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
-; CM-NEXT: LSHL * T1.W, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Z, T2.Y, T3.X, PV.Z,
+; CM-NEXT: LSHL * T0.W, PV.Y, literal.x,
; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
+; CM-NEXT: CNDE_INT * T0.W, T3.W, PV.Y, PV.Z,
; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z,
; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z,
@@ -612,105 +610,105 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W,
-; R600-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.y,
-; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.z,
-; R600-NEXT: -4096(nan), 975668412(6.390323e-04)
-; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T1.Z, PS,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; R600-NEXT: RNDNE T0.Z, PS,
; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
-; R600-NEXT: ADD * T2.W, KC0[2].W, -PV.Z,
-; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT: MUL_IEEE T2.Z, T0.Z, literal.y,
+; R600-NEXT: AND_INT * T2.W, KC0[2].W, literal.y,
+; R600-NEXT: 1079283712(3.321289e+00), -4096(nan)
+; R600-NEXT: ADD T1.Z, KC0[2].W, -PS,
; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
; R600-NEXT: ADD * T1.W, T3.W, -PV.Z,
+; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
+; R600-NEXT: ADD T2.Z, PS, PV.W,
+; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT: MUL_IEEE * T1.W, T2.W, literal.y,
; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
-; R600-NEXT: ADD T3.Z, PS, PV.W,
-; R600-NEXT: RNDNE T0.W, PV.Z,
-; R600-NEXT: MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
-; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: TRUNC T0.Y, T1.Z,
-; R600-NEXT: MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
-; R600-NEXT: ADD T1.W, T2.Z, -PV.W, BS:VEC_201
+; R600-NEXT: RNDNE T0.Y, PS,
+; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT: TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212
; R600-NEXT: EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T0.Z, PV.W, PV.Z,
-; R600-NEXT: FLT_TO_INT T1.W, PV.Y,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.x,
-; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.Z, PS, literal.x,
-; R600-NEXT: SETGT_UINT T3.W, PV.W, literal.y,
-; R600-NEXT: EXP_IEEE * T0.Y, PV.Z,
-; R600-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T1.X, PV.W, T2.W, PV.Z,
-; R600-NEXT: MUL_IEEE T1.Y, PS, literal.x,
-; R600-NEXT: MAX_INT T0.Z, T1.W, literal.y,
-; R600-NEXT: MIN_INT T2.W, T1.W, literal.z,
-; R600-NEXT: TRUNC * T0.W, T0.W,
+; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
+; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.y, PV.Z,
+; R600-NEXT: ADD * T1.W, T1.W, -PV.Y,
+; R600-NEXT: 209715200(1.972152e-31), 975668412(6.390323e-04)
+; R600-NEXT: ADD T1.Z, PS, PV.W,
+; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT: SETGT_UINT * T1.W, PV.Y, literal.y,
+; R600-NEXT: 209715200(1.972152e-31), -229(nan)
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.W, T0.Z,
+; R600-NEXT: SETGT_INT T0.W, T1.Y, literal.x,
+; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT: -127(nan), 0(0.000000e+00)
+; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.X,
+; R600-NEXT: MAX_INT T2.W, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, PS, literal.y,
+; R600-NEXT: -330(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T1.Z, T1.Y, literal.z,
+; R600-NEXT: MIN_INT T2.W, T1.Y, literal.w,
+; R600-NEXT: TRUNC * T4.W, T0.Y,
+; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
+; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
+; R600-NEXT: FLT_TO_INT T3.X, PS,
+; R600-NEXT: ADD_INT T0.Y, PV.W, literal.x,
+; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T2.W, T1.Y, literal.z,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, PV.Y, PV.Z,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, T1.X, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T1.Z, T0.W, PS, T1.Y,
+; R600-NEXT: CNDE_INT T0.W, PV.W, PV.Z, PV.Y,
+; R600-NEXT: MAX_INT * T1.W, PV.X, literal.y,
; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
-; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T2.X, PS,
-; R600-NEXT: ADD_INT T2.Y, PV.W, literal.x,
-; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T0.W, T1.W, literal.z,
-; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.w,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T3.X, T1.W, literal.x,
-; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT: SETGT_INT T0.Z, T1.W, literal.x,
-; R600-NEXT: MUL_IEEE T0.W, T0.X, literal.y,
-; R600-NEXT: MUL_IEEE * T4.W, T0.Y, literal.y,
-; R600-NEXT: -127(nan), 209715200(1.972152e-31)
-; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT: MUL_IEEE T4.Y, PV.W, literal.x,
-; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
-; R600-NEXT: CNDE_INT T3.W, T3.W, PV.X, T2.Y,
-; R600-NEXT: MAX_INT * T5.W, T2.X, literal.y,
-; R600-NEXT: 209715200(1.972152e-31), -330(nan)
-; R600-NEXT: SETGT_INT T3.X, T1.W, literal.x,
-; R600-NEXT: ADD_INT T2.Y, PS, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T2.X, literal.z,
-; R600-NEXT: SETGT_UINT * T1.W, T2.X, literal.w,
+; R600-NEXT: SETGT_INT T0.X, T1.Y, literal.x,
+; R600-NEXT: ADD_INT T0.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, T3.X, literal.z,
+; R600-NEXT: SETGT_UINT * T1.W, T3.X, literal.w,
; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: MIN_INT * T5.W, T2.X, literal.x,
+; R600-NEXT: MIN_INT * T4.W, T3.X, literal.x,
; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
; R600-NEXT: ADD_INT T5.X, PV.W, literal.x,
-; R600-NEXT: ADD_INT T3.Y, T2.X, literal.y,
-; R600-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
-; R600-NEXT: CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
-; R600-NEXT: SETGT_INT * T6.W, T2.X, literal.y,
+; R600-NEXT: ADD_INT T1.Y, T3.X, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T3.X, literal.z,
+; R600-NEXT: CNDE_INT T4.W, T1.W, T0.Y, T2.Z,
+; R600-NEXT: SETGT_INT * T5.W, T3.X, literal.y,
; R600-NEXT: -254(nan), -127(nan)
; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T2.X,
-; R600-NEXT: CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT: SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
-; R600-NEXT: CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
-; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T0.X, T0.Z, PS, T0.X,
-; R600-NEXT: LSHL T3.Y, PV.W, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
-; R600-NEXT: CNDE_INT T0.W, T1.W, T4.X, T4.W,
-; R600-NEXT: MUL_IEEE * T1.W, T1.Y, literal.y,
+; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T3.X,
+; R600-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T2.Z, T3.X, literal.x,
+; R600-NEXT: CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE * T4.W, T2.Y, literal.y,
+; R600-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212
+; R600-NEXT: LSHL T1.Y, PV.W, literal.x,
+; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
+; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T2.X, T3.Z, T1.Y, PS,
-; R600-NEXT: CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
-; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201
+; R600-NEXT: LSHL T1.Z, PV.Z, literal.x,
; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: CNDE_INT * T1.W, T3.X, PV.X, T1.X,
+; R600-NEXT: CNDE_INT * T1.W, T0.X, T0.Z, PV.X,
; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W,
-; R600-NEXT: SETGT T1.Z, literal.x, KC0[3].X,
+; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].X,
; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y,
; R600-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
; R600-NEXT: ALU clause starting at 101:
-; R600-NEXT: CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
+; R600-NEXT: CNDE_INT * T1.W, T2.Z, T1.X, T0.Y,
; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W,
-; R600-NEXT: SETGT T0.Z, literal.x, KC0[2].W,
-; R600-NEXT: CNDE T0.W, T1.Z, T1.Y, 0.0,
+; R600-NEXT: SETGT T1.Z, literal.x, KC0[2].W,
+; R600-NEXT: CNDE T0.W, T0.Z, T1.Y, 0.0,
; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y,
; R600-NEXT: -1036817932(-4.485347e+01), 1109008539(3.853184e+01)
; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x,
@@ -723,118 +721,116 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
;
; CM-LABEL: s_exp10_v2f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 100, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 18, @105, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 98, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 18, @103, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: ALU clause starting at 4:
; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W,
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT: AND_INT * T2.W, KC0[3].X, literal.z,
+; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
+; CM-NEXT: ADD T1.Y, KC0[3].X, -PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T0.Y, PV.W,
-; CM-NEXT: AND_INT T2.Z, KC0[3].X, literal.x,
-; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
-; CM-NEXT: -4096(nan), 1079283712(3.321289e+00)
; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
-; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT: FLT_TO_INT T1.Z, PV.Y,
-; CM-NEXT: ADD * T0.W, KC0[3].X, -PV.Z,
+; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
-; CM-NEXT: ADD T1.X, T0.Z, -T2.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
-; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT: RNDNE * T1.W, PV.Y,
-; CM-NEXT: 975668412(6.390323e-04), -330(nan)
-; CM-NEXT: TRUNC T2.X, PV.W,
-; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
-; CM-NEXT: ADD * T0.W, PV.X, T0.X,
-; CM-NEXT: 204(2.858649e-43), 1079283712(3.321289e+00)
-; CM-NEXT: EXP_IEEE T0.X, T0.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: ADD_INT T1.X, T1.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
-; CM-NEXT: ADD T0.Z, T1.Y, -T1.W,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT: 102(1.429324e-43), 975668412(6.390323e-04)
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T3.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT: SETGT_UINT T2.Z, T1.Z, literal.z,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: TRUNC T1.X, T1.Z,
+; CM-NEXT: RNDNE T2.Y, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T1.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
+; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T1.Y, PV.Z, literal.y,
+; CM-NEXT: TRUNC T1.Z, T2.Y,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W,
; CM-NEXT: EXP_IEEE T0.Y, T1.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT: CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
-; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T1.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE T3.Z, PV.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T2.X, PV.W, T0.W, PV.Z,
+; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, T1.Y, T1.X, T0.W,
+; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 209715200(1.972152e-31), -127(nan)
+; CM-NEXT: CNDE_INT T1.X, PV.W, PV.Z, T0.X,
; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
-; CM-NEXT: MAX_INT * T0.W, T0.Z, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), -330(nan)
-; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, T0.Z, literal.y,
-; CM-NEXT: SETGT_UINT T4.Z, T0.Z, literal.z,
-; CM-NEXT: MUL_IEEE * T0.W, T0.Y, literal.w,
+; CM-NEXT: SETGT_UINT T1.Z, PV.X, literal.y,
+; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
+; CM-NEXT: -330(nan), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T4.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y,
+; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T6.X, PV.W, literal.x,
-; CM-NEXT: MIN_INT T4.Y, T0.Z, literal.y,
-; CM-NEXT: CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT: MIN_INT T1.Y, T1.Z, literal.x,
-; CM-NEXT: ADD_INT T5.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 381(5.338947e-43), -254(nan)
; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T7.X, T1.W, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T4.Y, T0.Z, literal.x,
-; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T4.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.x,
+; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, PV.Y,
+; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
+; CM-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.Z,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT: -254(nan), -127(nan)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T5.X, T0.Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T2.X,
+; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T5.X, T2.W, PV.W, T0.Y,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T0.Y, T1.Z,
+; CM-NEXT: MUL_IEEE * T1.W, T5.X, literal.y,
+; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T5.X, T3.Z, T5.X, PV.W,
; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T1.X, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT: MUL_IEEE * T1.W, T2.Y, literal.y,
+; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T0.W, T2.Y, PV.W,
; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y,
; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
-; CM-NEXT: ALU clause starting at 105:
-; CM-NEXT: CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
-; CM-NEXT: MUL_IEEE T1.X, PV.W, T0.Z,
+; CM-NEXT: ALU clause starting at 103:
+; CM-NEXT: CNDE_INT * T0.W, T2.X, T4.X, T5.X,
+; CM-NEXT: MUL_IEEE T2.X, PV.W, T0.Z,
; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X,
; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212
; CM-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W,
@@ -1217,8 +1213,8 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
;
; R600-LABEL: s_exp10_v3f32:
; R600: ; %bb.0:
-; R600-NEXT: ALU 100, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 69, @107, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 69, @106, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
@@ -1226,69 +1222,68 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; R600-NEXT: ALU clause starting at 6:
; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: ADD T1.W, KC0[3].Y, -PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE T1.W, PV.W, literal.x,
+; R600-NEXT: ADD * T2.W, KC0[3].Y, -PV.W,
; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T3.W, PS,
-; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: RNDNE * T3.W, PV.W,
+; R600-NEXT: TRUNC T4.W, PV.W,
+; R600-NEXT: MUL_IEEE * T5.W, T2.W, literal.x,
; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT: TRUNC * T4.W, PV.W,
+; R600-NEXT: MULADD_IEEE T2.W, T2.W, literal.x, PS,
+; R600-NEXT: FLT_TO_INT * T4.W, PV.W,
; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T0.Z, PS,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T0.W, PS, PV.W,
-; R600-NEXT: MAX_INT * T1.W, PV.Z, literal.x,
-; R600-NEXT: -330(nan), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T0.Y, PS, literal.x,
-; R600-NEXT: ADD_INT T1.Z, T0.Z, literal.y,
-; R600-NEXT: SETGT_UINT T1.W, T0.Z, literal.z,
-; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MAX_INT T0.Z, PS, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.W,
+; R600-NEXT: ADD * T1.W, T1.W, -T3.W,
+; R600-NEXT: -330(nan), 975668412(6.390323e-04)
+; R600-NEXT: ADD T0.Y, PS, PV.W,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
+; R600-NEXT: ADD_INT T0.W, T4.W, literal.y,
+; R600-NEXT: SETGT_UINT * T1.W, T4.W, literal.z,
; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
; R600-NEXT: -229(nan), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T0.W, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.y,
-; R600-NEXT: -127(nan), 209715200(1.972152e-31)
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT: MIN_INT T3.W, T0.Z, literal.y,
-; R600-NEXT: AND_INT * T4.W, KC0[3].W, literal.z,
-; R600-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
-; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T1.X, T0.X, literal.x,
-; R600-NEXT: ADD T1.Y, KC0[3].W, -PS,
-; R600-NEXT: ADD_INT T2.Z, PV.W, literal.y,
-; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT * T5.W, T0.Z, literal.w,
-; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
+; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT: SETGT_INT T0.W, T4.W, literal.x,
+; R600-NEXT: EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT: -127(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T1.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, PV.W, PV.Z, T4.W,
+; R600-NEXT: MIN_INT T0.Z, T4.W, literal.y,
+; R600-NEXT: AND_INT T2.W, KC0[3].W, literal.z,
+; R600-NEXT: MUL_IEEE * T3.W, PS, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; R600-NEXT: -4096(nan), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: ADD T1.Y, KC0[3].W, -PV.W,
+; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T5.W, T4.W, literal.z,
+; R600-NEXT: SETGT_UINT * T6.W, T4.W, literal.w,
+; R600-NEXT: 209715200(1.972152e-31), -254(nan)
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T2.X, PS, PV.W, PV.Z,
-; R600-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
+; R600-NEXT: CNDE_INT T3.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T2.Y, T4.W, literal.x,
; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y,
-; R600-NEXT: MUL_IEEE T3.W, T4.W, literal.z,
-; R600-NEXT: MUL_IEEE * T6.W, PV.X, literal.w,
+; R600-NEXT: MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212
; R600-NEXT: 127(1.779649e-43), 975668412(6.390323e-04)
-; R600-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
-; R600-NEXT: RNDNE T3.Y, PV.W,
-; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; R600-NEXT: CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
-; R600-NEXT: CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T0.X, T0.W, PS, T0.X,
+; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W,
+; R600-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT: RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212
+; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z,
+; R600-NEXT: CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE * T1.W, T1.X, literal.y,
+; R600-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T1.X, T6.W, T1.X, PS,
; R600-NEXT: LSHL T0.Y, PV.W, literal.x,
; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y,
-; R600-NEXT: MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT: ADD * T1.W, T3.W, -PV.Y,
+; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT: ADD * T1.W, T4.W, -PV.Y,
; R600-NEXT: 23(3.222986e-44), -4096(nan)
; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
; R600-NEXT: ADD T1.Y, PS, PV.W,
; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x,
; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
+; R600-NEXT: CNDE_INT * T1.W, T2.Y, T0.X, PV.X,
; R600-NEXT: 1079283712(3.321289e+00), 1065353216(1.000000e+00)
; R600-NEXT: MUL_IEEE T0.X, PS, PV.W,
; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z,
@@ -1302,12 +1297,12 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z,
; R600-NEXT: -1036817932(-4.485347e+01), 975668412(6.390323e-04)
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T1.X, literal.y,
; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
; R600-NEXT: FLT_TO_INT T0.W, PV.Z,
; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; R600-NEXT: 1079283712(3.321289e+00), 381(5.338947e-43)
; R600-NEXT: ADD_INT T4.X, PS, literal.x,
; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y,
@@ -1325,7 +1320,7 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; R600-NEXT: 102(1.429324e-43), -229(nan)
; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x,
; R600-NEXT: -127(nan), 0(0.000000e+00)
-; R600-NEXT: ALU clause starting at 107:
+; R600-NEXT: ALU clause starting at 106:
; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x,
; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y,
@@ -1341,25 +1336,25 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x,
; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W,
; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y,
-; R600-NEXT: MUL_IEEE T4.W, T1.Z, literal.z,
-; R600-NEXT: MUL_IEEE * T5.W, PV.Y, literal.w,
+; R600-NEXT: MUL_IEEE T4.W, PV.Y, literal.z,
+; R600-NEXT: MUL_IEEE * T5.W, T1.Z, literal.w,
; R600-NEXT: 254(3.559298e-43), -330(nan)
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT: CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
-; R600-NEXT: MUL_IEEE T3.Y, PV.W, literal.x,
+; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
+; R600-NEXT: MUL_IEEE T6.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122
; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z,
-; R600-NEXT: SETGT_UINT * T5.W, T1.Y, literal.w,
+; R600-NEXT: SETGT_UINT * T4.W, T1.Y, literal.w,
; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), -229(nan)
; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W,
; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T2.W, T2.W, PV.X, T1.Z,
+; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, T1.Z,
+; R600-NEXT: CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212
; R600-NEXT: LSHL * T3.W, T4.Y, literal.y,
; R600-NEXT: -127(nan), 23(3.222986e-44)
; R600-NEXT: ADD_INT T6.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.Z, PV.W,
; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X,
; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y,
@@ -1367,18 +1362,18 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W,
; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X,
; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z,
-; R600-NEXT: CNDE_INT T0.W, T5.W, T2.Y, T1.W,
-; R600-NEXT: MUL_IEEE * T1.W, T3.X, literal.y,
+; R600-NEXT: MUL_IEEE T0.W, T2.Y, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T4.W, T3.X, T1.W,
; R600-NEXT: -1036817932(-4.485347e+01), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T3.X, T5.X, T3.X, PS,
-; R600-NEXT: CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
+; R600-NEXT: CNDE_INT T1.X, T5.Y, PS, T1.X,
+; R600-NEXT: CNDE_INT T1.Y, T5.X, T2.Y, PV.W,
; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0,
; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x,
; R600-NEXT: LSHL * T1.W, PV.X, literal.y,
; R600-NEXT: 1109008539(3.853184e+01), 23(3.222986e-44)
-; R600-NEXT: ADD_INT T1.X, PS, literal.x,
+; R600-NEXT: ADD_INT T3.X, PS, literal.x,
; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
+; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.X, PV.Y,
; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0,
; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z,
; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF)
@@ -1399,197 +1394,193 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
;
; CM-LABEL: s_exp10_v3f32:
; CM: ; %bb.0:
-; CM-NEXT: ALU 102, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 80, @109, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
+; CM-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 77, @108, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: ALU clause starting at 6:
; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x,
; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W,
-; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: RNDNE * T2.W, PV.Z,
-; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT: TRUNC T2.Z, PV.W,
+; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT: RNDNE T1.Z, PV.W,
; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT: ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT: FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT: TRUNC T1.Z, T1.Z,
+; CM-NEXT: ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT: MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT: 2130706432(1.701412e+38), -254(nan)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
+; CM-NEXT: ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT: SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T1.X, T2.X, T0.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
+; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T0.Y, T2.Y, T3.X, PV.Z,
; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
-; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[3].Z, literal.y,
; CM-NEXT: 23(3.222986e-44), -4096(nan)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, literal.x,
; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W,
-; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
-; CM-NEXT: 1079283712(3.321289e+00), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.Y, PV.W, PV.Z,
-; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT: RNDNE * T0.W, PV.X,
-; CM-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT * T1.W, T3.W, PV.X, PV.Y,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT: MUL_IEEE T0.Y, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT: AND_INT * T1.W, KC0[3].W, literal.z,
+; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT: -4096(nan), 0(0.000000e+00)
; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y,
-; CM-NEXT: TRUNC T2.Y, PV.W,
-; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.y,
-; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
-; CM-NEXT: -1036817932(-4.485347e+01), -4096(nan)
-; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
-; CM-NEXT: MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT: FLT_TO_INT T0.Z, PV.Y,
-; CM-NEXT: ADD * T1.W, KC0[3].W, -PV.Z,
+; CM-NEXT: ADD T2.Y, KC0[3].W, -PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y,
+; CM-NEXT: -1036817932(-4.485347e+01), 1079283712(3.321289e+00)
+; CM-NEXT: MULADD_IEEE T2.X, T0.W, literal.x, PV.W,
+; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212
; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
-; CM-NEXT: ADD T0.X, T0.X, -T0.W,
-; CM-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
-; CM-NEXT: MAX_INT T2.Z, PV.Z, literal.y,
-; CM-NEXT: RNDNE * T0.W, PV.Y,
-; CM-NEXT: 975668412(6.390323e-04), -330(nan)
-; CM-NEXT: TRUNC T3.X, PV.W,
-; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
-; CM-NEXT: ADD * T1.W, PV.X, T2.X,
-; CM-NEXT: 204(2.858649e-43), 1079283712(3.321289e+00)
-; CM-NEXT: EXP_IEEE T0.X, T1.W,
-; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x,
-; CM-NEXT: MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
-; CM-NEXT: ADD T1.Z, T1.Y, -T0.W,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT: 102(1.429324e-43), 975668412(6.390323e-04)
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T4.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT: SETGT_UINT T2.Z, T0.Z, literal.z,
-; CM-NEXT: ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT: -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: TRUNC T3.X, T1.Z,
+; CM-NEXT: RNDNE T1.Y, PV.W,
+; CM-NEXT: MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z,
+; CM-NEXT: ADD * T2.W, PV.Y, PV.X,
+; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE T0.Y, T2.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, T0.Z,
+; CM-NEXT: ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T0.Z, T3.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.y,
+; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, PV.Z, literal.y,
+; CM-NEXT: TRUNC T1.Z, T1.Y,
+; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T1.W,
-; CM-NEXT: EXP_IEEE T1.Z, T1.W,
+; CM-NEXT: EXP_IEEE T1.Y, T1.W,
+; CM-NEXT: EXP_IEEE T1.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W,
-; CM-NEXT: ALU clause starting at 109:
-; CM-NEXT: CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT: CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
-; CM-NEXT: FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T2.X, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT: MUL_IEEE T4.Z, PV.W, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T3.X, PV.W, T0.W, PV.Z,
-; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
-; CM-NEXT: MAX_INT * T0.W, T3.Z, literal.y,
-; CM-NEXT: 209715200(1.972152e-31), -330(nan)
-; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T1.Y, T3.Z, literal.y,
-; CM-NEXT: SETGT_UINT T5.Z, T3.Z, literal.z,
-; CM-NEXT: MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
+; CM-NEXT: FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, T3.Y, T3.X, T0.W,
+; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 209715200(1.972152e-31), -127(nan)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT: MUL_IEEE * T4.Y, PV.Y, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 108:
+; CM-NEXT: SETGT_UINT T1.Z, T2.X, literal.x,
+; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT: -229(nan), -330(nan)
+; CM-NEXT: ADD_INT T4.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T5.Y, T0.Z, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y,
+; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T7.X, PV.W, literal.x,
-; CM-NEXT: MIN_INT T4.Y, T3.Z, literal.y,
-; CM-NEXT: CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: SETGT_INT * T2.W, T3.Z, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 381(5.338947e-43)
; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
-; CM-NEXT: MIN_INT T1.Y, T0.Z, literal.x,
-; CM-NEXT: ADD_INT T6.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 381(5.338947e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T8.X, T1.W, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T4.Y, T3.Z, literal.x,
-; CM-NEXT: ADD_INT T3.Z, PV.Y, literal.y,
-; CM-NEXT: ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T1.Y,
+; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.x,
+; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT: 2130706432(1.701412e+38), -330(nan)
+; CM-NEXT: CNDE_INT T4.X, T0.W, PV.W, T0.Z,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T6.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT: -254(nan), -127(nan)
+; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, T1.W, PV.W, T2.X,
+; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T6.X, T2.W, PV.W, T1.Z,
-; CM-NEXT: LSHL T5.Y, PV.Z, literal.x,
-; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
-; CM-NEXT: CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T2.X, PV.W, T0.X,
-; CM-NEXT: LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: SETGT_INT T2.Y, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.Y, T1.Z,
+; CM-NEXT: MUL_IEEE * T1.W, T7.X, literal.y,
+; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T7.X, T3.Z, T7.X, PV.W,
+; CM-NEXT: LSHL T1.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.y,
+; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T4.X, T0.W, T0.Y, PV.W,
+; CM-NEXT: LSHL T0.Y, PV.Z, literal.x,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
+; CM-NEXT: CNDE_INT * T0.W, T2.X, T5.X, PV.X,
; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00)
; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
-; CM-NEXT: SETGT T3.Y, literal.x, KC0[3].W,
+; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].W,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
+; CM-NEXT: CNDE_INT * T0.W, T2.Y, T3.X, PV.X,
; CM-NEXT: -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].Z,
+; CM-NEXT: MUL_IEEE T3.X, PV.W, PV.Z,
+; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].Z,
; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0,
; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y,
; CM-NEXT: -1036817932(-4.485347e+01), 1109008539(3.853184e+01)
; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Y, PV.X, 0.0,
+; CM-NEXT: CNDE T0.Y, PV.Y, PV.X, 0.0,
; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y,
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; CM-NEXT: 2139095040(INF), 1109008539(3.853184e+01)
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.X, PV.W, literal.x,
-; CM-NEXT: CNDE T1.Y, PV.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE T0.Z, T1.X, T0.Y, 0.0,
+; CM-NEXT: LSHR T3.X, PV.W, literal.x,
+; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE T0.Z, T1.X, T0.X, 0.0,
; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z,
; CM-NEXT: 2(2.802597e-45), 2139095040(INF)
; CM-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00)
-; CM-NEXT: CNDE * T1.X, PV.W, PV.Z, literal.x,
+; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = call <3 x float> @llvm.exp10.v3f32(<3 x float> %in)
store <3 x float> %result, ptr addrspace(1) %out
@@ -2052,227 +2043,224 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; R600-LABEL: s_exp10_v4f32:
; R600: ; %bb.0:
; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 98, @105, KC0[CB0:0-32], KC1[]
-; R600-NEXT: ALU 24, @204, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 95, @105, KC0[CB0:0-32], KC1[]
+; R600-NEXT: ALU 24, @201, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 6:
; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x,
; R600-NEXT: -4096(nan), 0(0.000000e+00)
-; R600-NEXT: ADD T1.W, KC0[3].Z, -PV.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT: ADD * T1.W, KC0[3].Z, -PV.W,
+; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; R600-NEXT: RNDNE T4.W, PS,
+; R600-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122
; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T3.W, PS,
-; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PS,
+; R600-NEXT: ADD * T1.W, T3.W, -PV.W,
; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT: TRUNC * T4.W, PV.W,
-; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: FLT_TO_INT T0.Z, PS,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT: ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: ADD T1.Z, PS, PV.W,
-; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x,
-; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
-; R600-NEXT: -330(nan), 381(5.338947e-43)
-; R600-NEXT: ADD_INT T0.X, PS, literal.x,
-; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w,
-; R600-NEXT: EXP_IEEE * T1.X, PV.Z,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x,
-; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z,
-; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT: MUL_IEEE T0.Y, PS, literal.y,
-; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X,
-; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: AND_INT T2.Y, KC0[4].X, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT: MUL_IEEE * T2.W, PV.X, literal.y,
-; R600-NEXT: -4096(nan), 2130706432(1.701412e+38)
-; R600-NEXT: CNDE_INT T0.X, T1.Y, T3.X, PS,
-; R600-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.X,
-; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
-; R600-NEXT: ADD T0.W, KC0[4].X, -PV.Y,
-; R600-NEXT: MUL_IEEE * T1.W, PV.Y, literal.y,
-; R600-NEXT: 23(3.222986e-44), 1079283712(3.321289e+00)
-; R600-NEXT: RNDNE T1.Y, PS,
-; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; R600-NEXT: ADD_INT T2.W, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
-; R600-NEXT: 975668412(6.390323e-04), 1065353216(1.000000e+00)
-; R600-NEXT: MUL_IEEE T0.Y, PS, PV.W,
-; R600-NEXT: AND_INT T0.Z, KC0[3].W, literal.x,
-; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
-; R600-NEXT: TRUNC * T2.W, PV.Y,
-; R600-NEXT: -4096(nan), 1079283712(3.321289e+00)
-; R600-NEXT: SETGT T0.X, literal.x, KC0[3].Z,
-; R600-NEXT: FLT_TO_INT T3.Y, PS,
-; R600-NEXT: MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
-; R600-NEXT: ADD T0.W, T1.W, -T1.Y,
-; R600-NEXT: MUL_IEEE * T1.W, PV.Z, literal.z,
-; R600-NEXT: -1036817932(-4.485347e+01), 975668412(6.390323e-04)
-; R600-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT: RNDNE T1.X, PS,
-; R600-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
-; R600-NEXT: ADD T1.Z, PV.W, PV.Z,
-; R600-NEXT: MAX_INT T0.W, PV.Y, literal.y,
-; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.z,
-; R600-NEXT: -4096(nan), -330(nan)
+; R600-NEXT: ADD T0.W, PS, PV.W,
+; R600-NEXT: TRUNC * T1.W, T4.W,
+; R600-NEXT: FLT_TO_INT T1.W, PS,
+; R600-NEXT: EXP_IEEE * T0.X, PV.W,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: MAX_INT T0.W, PV.W, literal.y,
+; R600-NEXT: MIN_INT * T2.W, PV.W, literal.z,
+; R600-NEXT: 209715200(1.972152e-31), -330(nan)
; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T2.X, PS, literal.x,
-; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
-; R600-NEXT: SETGT_UINT T0.W, T3.Y, literal.w,
-; R600-NEXT: EXP_IEEE * T1.Z, PV.Z,
-; R600-NEXT: -254(nan), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -229(nan)
-; R600-NEXT: ADD_INT T3.X, T3.Y, literal.x,
-; R600-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
-; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT T2.W, T3.Y, literal.x,
-; R600-NEXT: MUL_IEEE * T3.W, PS, literal.z,
+; R600-NEXT: ADD_INT T1.X, PS, literal.x,
+; R600-NEXT: AND_INT T0.Y, KC0[4].X, literal.y,
+; R600-NEXT: ADD_INT T1.Z, PV.W, literal.z,
+; R600-NEXT: ADD_INT * T0.W, T1.W, literal.w,
+; R600-NEXT: -254(nan), -4096(nan)
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.x,
+; R600-NEXT: -229(nan), 0(0.000000e+00)
+; R600-NEXT: ADD_INT T2.X, T1.W, literal.x,
+; R600-NEXT: SETGT_UINT T1.Y, T1.W, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, T1.Z, T0.W,
+; R600-NEXT: SETGT_INT T0.W, T1.W, literal.x,
+; R600-NEXT: ADD * T3.W, KC0[4].X, -T0.Y,
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T4.X, T1.Z, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
-; R600-NEXT: CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
-; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T2.X,
-; R600-NEXT: SETGT_INT * T5.W, T3.Y, literal.z,
-; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, T0.Y, literal.y,
+; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T1.W,
+; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T1.X,
+; R600-NEXT: SETGT_INT * T1.W, T1.W, literal.z,
+; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT: ADD T2.X, KC0[3].W, -T0.Z,
-; R600-NEXT: CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT: CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
-; R600-NEXT: ALU clause starting at 105:
-; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.x,
-; R600-NEXT: ADD * T3.W, KC0[3].Y, -T1.Y,
+; R600-NEXT: CNDE_INT T1.X, PS, PV.Z, PV.W,
+; R600-NEXT: RNDNE T3.Y, PV.Y,
+; R600-NEXT: MULADD_IEEE T1.Z, T3.W, literal.x, PV.X,
+; R600-NEXT: MUL_IEEE T3.W, T0.Z, literal.y,
+; R600-NEXT: MUL_IEEE * T4.W, T0.X, literal.z,
+; R600-NEXT: 1079283712(3.321289e+00), 209715200(1.972152e-31)
; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T4.Y, T2.W, PV.W, T0.Z,
+; R600-NEXT: MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z,
+; R600-NEXT: ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT: AND_INT * T3.W, KC0[3].Y, literal.z,
+; R600-NEXT: 2130706432(1.701412e+38), 975668412(6.390323e-04)
+; R600-NEXT: -4096(nan), 0(0.000000e+00)
; R600-NEXT: MUL_IEEE T3.X, PS, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, T1.Y, literal.y,
-; R600-NEXT: CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
-; R600-NEXT: LSHL * T2.W, T3.Y, literal.z,
-; R600-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
-; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; R600-NEXT: ADD_INT T4.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
-; R600-NEXT: RNDNE T1.Z, PV.Y,
-; R600-NEXT: MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT: MUL_IEEE * T2.W, T2.X, literal.z,
+; R600-NEXT: ADD T0.Y, PV.W, PV.Z,
+; R600-NEXT: CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.W, T1.Y, T4.W, PV.X,
+; R600-NEXT: LSHL * T2.W, T1.X, literal.y,
+; R600-NEXT: 1079283712(3.321289e+00), 23(3.222986e-44)
+; R600-NEXT: AND_INT T0.X, KC0[3].W, literal.x,
+; R600-NEXT: TRUNC T1.Y, T3.Y,
+; R600-NEXT: ADD_INT T1.Z, PS, literal.y,
+; R600-NEXT: CNDE_INT T0.W, T1.W, PV.Z, PV.W,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT: -4096(nan), 1065353216(1.000000e+00)
+; R600-NEXT: MUL_IEEE T1.X, PV.W, PV.Z,
+; R600-NEXT: FLT_TO_INT T1.Y, PV.Y,
+; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT: ADD T0.W, KC0[3].W, -PV.X,
+; R600-NEXT: RNDNE * T1.W, T3.X,
+; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Z,
+; R600-NEXT: TRUNC T2.Y, PS,
+; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.y,
+; R600-NEXT: MUL_IEEE T2.W, PV.Z, literal.z,
+; R600-NEXT: MAX_INT * T4.W, PV.Y, literal.w,
+; R600-NEXT: -1036817932(-4.485347e+01), 975668412(6.390323e-04)
+; R600-NEXT: 209715200(1.972152e-31), -330(nan)
+; R600-NEXT: ADD T4.X, KC0[3].Y, -T3.W,
+; R600-NEXT: ADD_INT T3.Y, PS, literal.x,
+; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T4.W, T1.Y, literal.z,
+; R600-NEXT: MIN_INT * T5.W, T1.Y, literal.w,
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: -229(nan), 381(5.338947e-43)
+; R600-NEXT: ADD_INT T5.X, PS, literal.x,
+; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
+; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
+; R600-NEXT: CNDE_INT T5.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT * T6.W, T1.Y, literal.y,
+; R600-NEXT: -254(nan), -127(nan)
+; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T6.X, T0.Y, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, PS, PV.W, T1.Y,
+; R600-NEXT: CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X,
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: ALU clause starting at 105:
+; R600-NEXT: SETGT_INT T5.W, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE * T7.W, T4.X, literal.y,
+; R600-NEXT: 127(1.779649e-43), 975668412(6.390323e-04)
+; R600-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
+; R600-NEXT: MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T2.Z, PV.W, T3.Y, T2.Z,
+; R600-NEXT: MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201
+; R600-NEXT: CNDE_INT * T2.W, T4.W, T2.W, T0.Z,
+; R600-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T4.X, T6.W, PS, T0.Y,
+; R600-NEXT: CNDE_INT T0.Y, T3.Z, T6.X, PV.W,
+; R600-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT: MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201
+; R600-NEXT: ADD * T1.W, T3.X, -T1.W,
+; R600-NEXT: 23(3.222986e-44), 975668412(6.390323e-04)
+; R600-NEXT: ADD T3.X, PS, PV.W,
+; R600-NEXT: ADD_INT T1.Y, PV.Z, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T5.W, PV.X, PV.Y,
+; R600-NEXT: RNDNE T1.W, T5.X,
+; R600-NEXT: MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122
; R600-NEXT: 1065353216(1.000000e+00), 1079283712(3.321289e+00)
-; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: MULADD_IEEE T2.X, T2.X, literal.x, PS,
-; R600-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
-; R600-NEXT: ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT: MUL_IEEE T0.W, PV.Y, PV.X,
-; R600-NEXT: SETGT * T2.W, literal.z, KC0[4].X,
-; R600-NEXT: 1079283712(3.321289e+00), 975668412(6.390323e-04)
-; R600-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00)
-; R600-NEXT: CNDE T3.X, PS, PV.W, 0.0,
-; R600-NEXT: ADD T1.Y, PV.Z, PV.Y,
-; R600-NEXT: TRUNC T1.Z, T1.Z,
-; R600-NEXT: MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT: ADD * T1.W, T1.W, -T1.X,
-; R600-NEXT: 975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT: SETGT T2.X, KC0[4].X, literal.x,
-; R600-NEXT: ADD T2.Y, PS, PV.W,
-; R600-NEXT: FLT_TO_INT T0.Z, PV.Z,
-; R600-NEXT: TRUNC T0.W, T1.X,
-; R600-NEXT: EXP_IEEE * T1.X, PV.Y,
-; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT: FLT_TO_INT T1.Y, PV.W,
-; R600-NEXT: MAX_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: MUL_IEEE T0.W, PS, literal.z,
-; R600-NEXT: EXP_IEEE * T1.W, PV.Y,
-; R600-NEXT: 2130706432(1.701412e+38), -330(nan)
+; R600-NEXT: MULADD_IEEE T0.X, T0.X, literal.x, PS,
+; R600-NEXT: ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: MUL_IEEE T0.Z, PV.Z, PV.Y,
+; R600-NEXT: SETGT T0.W, literal.y, KC0[4].X,
+; R600-NEXT: EXP_IEEE * T1.Y, PV.X,
+; R600-NEXT: 975668412(6.390323e-04), -1036817932(-4.485347e+01)
+; R600-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
+; R600-NEXT: ADD T0.Y, PV.Y, PV.X,
+; R600-NEXT: FLT_TO_INT T0.Z, T2.Y,
+; R600-NEXT: TRUNC T0.W, T1.W,
+; R600-NEXT: MUL_IEEE * T1.W, PS, literal.x,
; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
-; R600-NEXT: MUL_IEEE T2.Y, PS, literal.x,
-; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T2.W, T0.Z, literal.z,
-; R600-NEXT: MAX_INT * T3.W, PV.Y, literal.w,
-; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43)
-; R600-NEXT: 102(1.429324e-43), -330(nan)
-; R600-NEXT: SETGT_UINT T6.X, T0.Z, literal.x,
-; R600-NEXT: ADD_INT T3.Y, PS, literal.y,
-; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.z,
-; R600-NEXT: SETGT_UINT T3.W, T1.Y, literal.x,
-; R600-NEXT: MIN_INT * T4.W, T1.Y, literal.w,
+; R600-NEXT: SETGT T0.X, KC0[4].X, literal.x,
+; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT: FLT_TO_INT T1.Z, PV.W,
+; R600-NEXT: MAX_INT T0.W, PV.Z, literal.z,
+; R600-NEXT: EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT: 1109008539(3.853184e+01), 209715200(1.972152e-31)
+; R600-NEXT: -330(nan), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T4.X, T1.Y, literal.x,
+; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y,
+; R600-NEXT: ADD_INT T2.Z, PV.W, literal.z,
+; R600-NEXT: ADD_INT * T0.W, T0.Z, literal.w,
+; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT: MAX_INT * T2.W, T1.Z, literal.x,
+; R600-NEXT: -330(nan), 0(0.000000e+00)
+; R600-NEXT: SETGT_UINT T5.X, T0.Z, literal.x,
+; R600-NEXT: ADD_INT T4.Y, PV.W, literal.y,
+; R600-NEXT: ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212
+; R600-NEXT: SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: MIN_INT * T3.W, T1.Z, literal.w,
; R600-NEXT: -229(nan), 204(2.858649e-43)
; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43)
-; R600-NEXT: ADD_INT T7.X, PS, literal.x,
-; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y,
-; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z,
-; R600-NEXT: CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
-; R600-NEXT: SETGT_INT * T5.W, T1.Y, literal.y,
+; R600-NEXT: ADD_INT T6.X, PS, literal.x,
+; R600-NEXT: ADD_INT T5.Y, T1.Z, literal.y,
+; R600-NEXT: SETGT_UINT T4.Z, T1.Z, literal.z,
+; R600-NEXT: CNDE_INT T3.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT: SETGT_INT * T4.W, T1.Z, literal.y,
; R600-NEXT: -254(nan), -127(nan)
; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T8.X, PS, PV.W, T1.Y,
-; R600-NEXT: CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT: SETGT_INT T2.Z, T1.Y, literal.x,
-; R600-NEXT: CNDE_INT T2.W, T6.X, T1.Z, T2.W,
-; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.y,
+; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT: CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221
+; R600-NEXT: SETGT_INT * T3.W, T0.Z, literal.y,
; R600-NEXT: 127(1.779649e-43), -127(nan)
-; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T0.Z,
-; R600-NEXT: CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
-; R600-NEXT: MIN_INT T1.Z, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE T2.W, T1.W, literal.y,
-; R600-NEXT: MUL_IEEE * T6.W, T2.Y, literal.z,
-; R600-NEXT: 381(5.338947e-43), 2130706432(1.701412e+38)
-; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T8.X, T3.W, PS, T2.Y,
-; R600-NEXT: MUL_IEEE T2.Y, PV.W, literal.x,
-; R600-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT: ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT: SETGT_UINT * T6.W, T0.Z, literal.w,
+; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T0.Z,
+; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.X, PV.Y,
+; R600-NEXT: MIN_INT T2.Z, T0.Z, literal.x,
+; R600-NEXT: MUL_IEEE T0.W, T3.Y, literal.y,
+; R600-NEXT: MUL_IEEE * T5.W, T0.Y, literal.z,
+; R600-NEXT: 381(5.338947e-43), 209715200(1.972152e-31)
+; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT: MUL_IEEE T7.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T3.Y, T2.W, PV.W, T3.Y,
+; R600-NEXT: ADD_INT T2.Z, PV.Z, literal.y,
+; R600-NEXT: ADD_INT T0.W, T0.Z, literal.z,
+; R600-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; R600-NEXT: 2130706432(1.701412e+38), -254(nan)
; R600-NEXT: -127(nan), 254(3.559298e-43)
-; R600-NEXT: CNDE_INT T9.X, PS, PV.W, PV.Z,
-; R600-NEXT: SETGT_INT T3.Y, T0.Z, literal.x,
-; R600-NEXT: CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
-; R600-NEXT: LSHL * T2.W, T1.Y, literal.y,
+; R600-NEXT: CNDE_INT T8.X, PS, PV.W, PV.Z,
+; R600-NEXT: SETGT_INT T5.Y, T0.Z, literal.x,
+; R600-NEXT: CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122
+; R600-NEXT: CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT: LSHL * T4.W, T4.Y, literal.y,
; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44)
-; R600-NEXT: ADD_INT T8.X, PS, literal.x,
-; R600-NEXT: CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
-; R600-NEXT: CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
-; R600-NEXT: CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
-; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE * T1.W, T4.X, literal.x,
-; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT: CNDE_INT T4.X, T6.W, T4.X, PV.W,
-; R600-NEXT: CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
-; R600-NEXT: ALU clause starting at 204:
+; R600-NEXT: ADD_INT T7.X, PS, literal.x,
+; R600-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, PV.W,
+; R600-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
+; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT: CNDE_INT * T1.W, T5.X, T2.Y, T1.W,
+; R600-NEXT: 1065353216(1.000000e+00), 2130706432(1.701412e+38)
+; R600-NEXT: CNDE_INT T5.X, T3.W, PS, T1.Y,
+; R600-NEXT: CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT: ALU clause starting at 201:
; R600-NEXT: LSHL T0.Z, T0.Z, literal.x,
-; R600-NEXT: MUL_IEEE T0.W, T1.Y, T8.X,
+; R600-NEXT: MUL_IEEE T0.W, T0.Y, T7.X,
; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W,
; R600-NEXT: 23(3.222986e-44), -1036817932(-4.485347e+01)
-; R600-NEXT: CNDE T1.X, PS, PV.W, 0.0,
-; R600-NEXT: SETGT T1.Y, KC0[3].W, literal.x,
+; R600-NEXT: CNDE T4.X, PS, PV.W, 0.0,
+; R600-NEXT: SETGT T0.Y, KC0[3].W, literal.x,
; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT: CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
-; R600-NEXT: CNDE * T1.W, T2.X, T3.X, literal.z,
+; R600-NEXT: CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221
+; R600-NEXT: CNDE * T1.W, T0.X, T3.X, literal.z,
; R600-NEXT: 1109008539(3.853184e+01), 1065353216(1.000000e+00)
; R600-NEXT: 2139095040(INF), 0(0.000000e+00)
-; R600-NEXT: MUL_IEEE T2.X, PV.W, PV.Z,
+; R600-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y,
; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y,
-; R600-NEXT: CNDE T0.W, T0.X, T0.Y, 0.0,
+; R600-NEXT: CNDE T0.W, T2.X, T1.X, 0.0,
; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z,
; R600-NEXT: -1036817932(-4.485347e+01), 2139095040(INF)
; R600-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00)
@@ -2287,8 +2275,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; CM-LABEL: s_exp10_v4f32:
; CM: ; %bb.0:
; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 100, @104, KC0[CB0:0-32], KC1[]
-; CM-NEXT: ALU 36, @205, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 97, @104, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 35, @202, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
@@ -2307,224 +2295,220 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z,
-; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x,
; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
; CM-NEXT: TRUNC T1.X, T1.Z,
-; CM-NEXT: RNDNE T2.Y, PV.W,
-; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; CM-NEXT: ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W,
+; CM-NEXT: RNDNE T1.Z, PV.Z,
+; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
; CM-NEXT: 1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT: EXP_IEEE T0.X, T0.W,
+; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT: TRUNC T2.X, T1.Z,
+; CM-NEXT: MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y,
+; CM-NEXT: FLT_TO_INT T2.Z, T1.X,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT: ADD T1.X, T0.Z, -T1.Z,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.x,
+; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
+; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T0.Z, T2.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T1.W, T2.Z, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: ADD_INT T4.X, T2.Z, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, T2.Z, literal.y,
+; CM-NEXT: CNDE_INT T0.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T2.W, T2.Z, literal.x,
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: MUL_IEEE T5.X, T0.X, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T2.Z,
+; CM-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T3.X,
+; CM-NEXT: SETGT_INT * T3.W, T2.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43)
+; CM-NEXT: AND_INT T3.X, KC0[3].Z, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.Y, T0.W,
+; CM-NEXT: -4096(nan), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T1.Y, T3.Y, T5.X, PV.Z,
+; CM-NEXT: LSHL T0.Z, PV.Y, literal.x,
+; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT: 23(3.222986e-44), 1079283712(3.321289e+00)
+; CM-NEXT: RNDNE T4.X, PV.W,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, T3.W, PV.X, PV.Y,
+; CM-NEXT: ADD * T1.W, T1.X, T0.Y,
+; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
; CM-NEXT: EXP_IEEE T0.X, T1.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W,
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
-; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
-; CM-NEXT: FLT_TO_INT T0.Z, T1.X,
-; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y,
-; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT: MUL_IEEE T1.X, T0.Z, T2.Y,
+; CM-NEXT: TRUNC T0.Y, T4.X,
+; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
+; CM-NEXT: MUL_IEEE * T1.W, PV.X, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y,
; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z,
-; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.w,
+; CM-NEXT: MIN_INT * T2.W, PV.Z, literal.w,
; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38)
; CM-NEXT: -330(nan), 381(5.338947e-43)
-; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T3.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z,
-; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; CM-NEXT: -254(nan), 204(2.858649e-43)
; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: ADD_INT T4.X, T0.Z, literal.x,
-; CM-NEXT: SETGT_UINT T4.Y, T0.Z, literal.y,
+; CM-NEXT: ADD_INT T6.X, T0.Z, literal.x,
+; CM-NEXT: SETGT_UINT T3.Y, T0.Z, literal.y,
; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.x,
; CM-NEXT: -127(nan), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT: CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
-; CM-NEXT: SETGT_INT T0.Z, T0.Z, literal.x,
-; CM-NEXT: MUL_IEEE * T3.W, T1.Y, literal.y,
-; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38)
-; CM-NEXT: CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
-; CM-NEXT: AND_INT T1.Y, KC0[3].Z, literal.x,
-; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.X, T0.W,
-; CM-NEXT: -4096(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X,
-; CM-NEXT: LSHL T3.Y, PV.Z, literal.x,
-; CM-NEXT: TRUNC T1.Z, T2.Y,
-; CM-NEXT: ADD * T0.W, KC0[3].Z, -PV.Y,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x,
-; CM-NEXT: FLT_TO_INT T2.Y, PV.Z,
-; CM-NEXT: ADD_INT T1.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
-; CM-NEXT: 975668412(6.390323e-04), 1065353216(1.000000e+00)
-; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT: MIN_INT T3.Y, PV.Y, literal.x,
-; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
-; CM-NEXT: ADD * T0.W, T0.Y, T2.X,
-; CM-NEXT: 381(5.338947e-43), 1079283712(3.321289e+00)
-; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE T0.Y, T0.W,
-; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT: MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
-; CM-NEXT: MUL_IEEE T4.Y, PV.Y, literal.y,
-; CM-NEXT: ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
-; CM-NEXT: 975668412(6.390323e-04), 2130706432(1.701412e+38)
-; CM-NEXT: -254(nan), -330(nan)
-; CM-NEXT: ADD_INT T2.X, T2.Y, literal.x,
-; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T1.Z, T2.Y, literal.z,
-; CM-NEXT: SETGT_UINT * T0.W, T2.Y, literal.w,
-; CM-NEXT: -127(nan), 204(2.858649e-43)
-; CM-NEXT: 102(1.429324e-43), -229(nan)
-; CM-NEXT: SETGT_UINT T3.X, T2.Y, literal.x,
-; CM-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T1.Z, T2.Y, literal.y,
-; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT: 254(3.559298e-43), -127(nan)
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T4.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
-; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 104:
-; CM-NEXT: CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
-; CM-NEXT: SETGT_INT * T2.W, T2.Y, literal.x,
+; CM-NEXT: CNDE_INT T7.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT: CNDE_INT T2.Y, PV.Y, PV.X, T5.X,
+; CM-NEXT: SETGT_INT * T0.Z, T0.Z, literal.x,
; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T2.X, T1.Y, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
-; CM-NEXT: CNDE_INT T0.Z, T0.W, T4.X, T1.W,
-; CM-NEXT: MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
-; CM-NEXT: 1079283712(3.321289e+00), 2130706432(1.701412e+38)
-; CM-NEXT: AND_INT T4.X, KC0[4].X, literal.x,
-; CM-NEXT: CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
-; CM-NEXT: CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
-; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
-; CM-NEXT: -4096(nan), 23(3.222986e-44)
-; CM-NEXT: ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
-; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y,
-; CM-NEXT: RNDNE * T0.W, T2.X,
-; CM-NEXT: 1065353216(1.000000e+00), 1079283712(3.321289e+00)
-; CM-NEXT: ADD T2.X, T2.X, -PV.W,
-; CM-NEXT: RNDNE T1.Y, PV.Z,
-; CM-NEXT: MUL_IEEE T1.Z, PV.Y, PV.X,
-; CM-NEXT: SETGT * T1.W, literal.x, KC0[3].W,
-; CM-NEXT: -1036817932(-4.485347e+01), 0(0.000000e+00)
-; CM-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0,
-; CM-NEXT: TRUNC T0.Y, T0.W,
-; CM-NEXT: TRUNC T1.Z, PV.Y,
-; CM-NEXT: ADD * T0.W, PV.X, T1.X,
+; CM-NEXT: ALU clause starting at 104:
+; CM-NEXT: ADD * T4.W, KC0[3].Z, -T3.X,
+; CM-NEXT: MUL_IEEE T5.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, T0.Z, T7.X, T2.Y,
+; CM-NEXT: MUL_IEEE T1.Z, T1.Y, literal.y,
+; CM-NEXT: CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122
+; CM-NEXT: 975668412(6.390323e-04), 2130706432(1.701412e+38)
+; CM-NEXT: CNDE_INT T0.X, T3.W, PV.W, T0.X,
+; CM-NEXT: CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z,
+; CM-NEXT: LSHL T1.Z, PV.Y, literal.x,
+; CM-NEXT: MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212
+; CM-NEXT: 23(3.222986e-44), 1079283712(3.321289e+00)
+; CM-NEXT: MULADD_IEEE T2.X, T3.X, literal.x, PV.W,
+; CM-NEXT: ADD T2.Y, T0.W, -T4.X,
+; CM-NEXT: ADD_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T0.Z, PV.X, PV.Y,
+; CM-NEXT: 975668412(6.390323e-04), 1065353216(1.000000e+00)
+; CM-NEXT: AND_INT T0.X, KC0[4].X, literal.x,
+; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
+; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT: ADD * T0.W, PV.Y, PV.X,
+; CM-NEXT: -4096(nan), -1036817932(-4.485347e+01)
; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
; CM-NEXT: EXP_IEEE * T0.W, T0.W,
-; CM-NEXT: FLT_TO_INT T1.X, T1.Z,
-; CM-NEXT: FLT_TO_INT T0.Y, T0.Y,
-; CM-NEXT: MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT: ADD * T1.W, KC0[4].X, -T4.X,
-; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x,
-; CM-NEXT: MUL_IEEE T2.Y, T0.W, literal.y,
-; CM-NEXT: MUL_IEEE T2.Z, PV.Z, literal.z,
-; CM-NEXT: SETGT_UINT * T2.W, PV.Y, literal.w,
-; CM-NEXT: 975668412(6.390323e-04), 209715200(1.972152e-31)
-; CM-NEXT: 2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
-; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT: MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
-; CM-NEXT: MAX_INT * T1.W, T1.X, literal.z,
-; CM-NEXT: 209715200(1.972152e-31), 1079283712(3.321289e+00)
-; CM-NEXT: -330(nan), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT: ADD_INT T4.Y, T1.X, literal.y,
-; CM-NEXT: MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
-; CM-NEXT: MAX_INT * T1.W, T0.Y, literal.w,
-; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT: CNDE T2.X, T0.Z, T1.Y, 0.0,
+; CM-NEXT: ADD T1.Y, KC0[4].X, -T0.X,
+; CM-NEXT: FLT_TO_INT T0.Z, T0.Y,
+; CM-NEXT: MUL_IEEE * T1.W, PV.W, literal.x,
+; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT: SETGT_UINT T0.Y, PV.Z, literal.y,
+; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.z,
+; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.w,
+; CM-NEXT: 209715200(1.972152e-31), -229(nan)
+; CM-NEXT: 975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT: RNDNE T4.X, PV.W,
+; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.W,
+; CM-NEXT: SETGT_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT: 1079283712(3.321289e+00), -127(nan)
+; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.W,
+; CM-NEXT: MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y,
+; CM-NEXT: ADD T1.Z, T2.W, -PV.X,
+; CM-NEXT: MAX_INT * T2.W, T0.Z, literal.y,
; CM-NEXT: 975668412(6.390323e-04), -330(nan)
-; CM-NEXT: ADD T4.X, T0.Z, -T1.Y,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.x,
-; CM-NEXT: ADD_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: SETGT_UINT * T1.W, T0.Y, literal.z,
+; CM-NEXT: ADD_INT T0.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, T0.Z, literal.y,
+; CM-NEXT: TRUNC T2.Z, T4.X,
+; CM-NEXT: ADD * T2.W, PV.Z, PV.Y,
; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT: -229(nan), 0(0.000000e+00)
-; CM-NEXT: SETGT_UINT T6.X, T1.X, literal.x,
-; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT: SETGT_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: ADD * T3.W, PV.X, T1.Z,
-; CM-NEXT: -229(nan), -127(nan)
-; CM-NEXT: EXP_IEEE T1.X (MASKED), T3.W,
-; CM-NEXT: EXP_IEEE T1.Y (MASKED), T3.W,
-; CM-NEXT: EXP_IEEE T1.Z, T3.W,
-; CM-NEXT: EXP_IEEE * T1.W (MASKED), T3.W,
-; CM-NEXT: CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
-; CM-NEXT: CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
-; CM-NEXT: SETGT_INT T2.Z, T1.X, literal.x,
-; CM-NEXT: MUL_IEEE * T3.W, PV.Z, literal.y,
-; CM-NEXT: -127(nan), 209715200(1.972152e-31)
-; CM-NEXT: MUL_IEEE T2.X, T1.Z, literal.x,
-; CM-NEXT: MUL_IEEE T4.Y, PV.W, literal.y,
-; CM-NEXT: CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
-; CM-NEXT: MIN_INT * T4.W, T1.X, literal.z,
+; CM-NEXT: EXP_IEEE T1.X (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE T1.Y, T2.W,
+; CM-NEXT: EXP_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT: EXP_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT: MUL_IEEE T4.X, T0.W, literal.x,
+; CM-NEXT: FLT_TO_INT T3.Y, T2.Z,
+; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, T0.Y, T0.X, T2.Y,
; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31)
+; CM-NEXT: CNDE_INT T0.X, T1.W, PV.W, T0.Z,
+; CM-NEXT: MUL_IEEE T0.Y, PV.Z, literal.x,
+; CM-NEXT: MAX_INT T2.Z, PV.Y, literal.y,
+; CM-NEXT: MIN_INT * T0.W, PV.Y, literal.z,
+; CM-NEXT: 209715200(1.972152e-31), -330(nan)
; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT: MIN_INT T7.X, T0.Y, literal.x,
-; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT: ADD_INT T4.Z, T1.X, literal.z,
-; CM-NEXT: SETGT_UINT * T4.W, T1.X, literal.w,
-; CM-NEXT: 381(5.338947e-43), -254(nan)
+; CM-NEXT: ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT: ADD_INT T2.Z, T3.Y, literal.z,
+; CM-NEXT: SETGT_UINT * T0.W, T3.Y, literal.w,
+; CM-NEXT: -254(nan), 204(2.858649e-43)
+; CM-NEXT: 102(1.429324e-43), -229(nan)
+; CM-NEXT: ADD_INT T6.X, T3.Y, literal.x,
+; CM-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y,
+; CM-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT: SETGT_INT * T1.W, T3.Y, literal.x,
; CM-NEXT: -127(nan), 254(3.559298e-43)
-; CM-NEXT: CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
-; CM-NEXT: SETGT_INT T1.Y, T1.X, literal.x,
-; CM-NEXT: ADD_INT T4.Z, PV.X, literal.y,
-; CM-NEXT: ADD_INT * T5.W, T0.Y, literal.z,
+; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T3.Y,
+; CM-NEXT: CNDE_INT T2.Z, PV.Y, PV.X, T5.X,
+; CM-NEXT: MIN_INT * T2.W, T0.Z, literal.y,
+; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT: SETGT_INT T5.X, T3.Y, literal.x,
+; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y,
+; CM-NEXT: ADD_INT T3.Z, T0.Z, literal.z,
+; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w,
; CM-NEXT: 127(1.779649e-43), -254(nan)
-; CM-NEXT: -127(nan), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT T1.X, T2.W, PV.W, PV.Z,
-; CM-NEXT: CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
-; CM-NEXT: CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
-; CM-NEXT: MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: -127(nan), 254(3.559298e-43)
+; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT: CNDE_INT T2.Y, PV.X, T2.Y, T2.Z,
+; CM-NEXT: MUL_IEEE T2.Z, T7.X, literal.x,
+; CM-NEXT: CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122
; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT: SETGT_INT T6.X, T0.Y, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T4.W, T2.X, PV.W,
-; CM-NEXT: CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
-; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 205:
-; CM-NEXT: LSHL * T2.W, T5.Y, literal.x,
-; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT: ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT: CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
-; CM-NEXT: CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
+; CM-NEXT: SETGT_INT T8.X, T0.Z, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.Y,
+; CM-NEXT: CNDE_INT T0.Z, T4.Y, T7.X, PV.Z,
+; CM-NEXT: LSHL * T0.W, PV.Y, literal.y,
+; CM-NEXT: 127(1.779649e-43), 23(3.222986e-44)
+; CM-NEXT: ALU clause starting at 202:
+; CM-NEXT: ADD_INT T7.X, T0.W, literal.x,
+; CM-NEXT: CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z,
; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00)
-; CM-NEXT: CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
-; CM-NEXT: CNDE_INT T1.X, T0.Z, PV.W, T0.W,
-; CM-NEXT: LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T2.X,
+; CM-NEXT: CNDE_INT * T0.Z, T8.X, T0.X, T6.X,
+; CM-NEXT: MUL_IEEE * T0.W, T4.X, literal.x,
+; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT T0.X, T2.W, T4.X, PV.W,
+; CM-NEXT: LSHL T1.Y, T0.Z, literal.x,
+; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122
; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X,
; CM-NEXT: 23(3.222986e-44), -1036817932(-4.485347e+01)
-; CM-NEXT: CNDE T2.X, PV.W, PV.Z, 0.0,
+; CM-NEXT: CNDE T4.X, PV.W, PV.Z, 0.0,
; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x,
; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT: CNDE_INT * T0.W, T6.X, PV.X, T5.X,
+; CM-NEXT: CNDE_INT * T0.W, T8.X, T3.X, PV.X,
; CM-NEXT: 1109008539(3.853184e+01), 1065353216(1.000000e+00)
-; CM-NEXT: SETGT T1.X, KC0[3].W, literal.x,
+; CM-NEXT: SETGT T0.X, KC0[3].W, literal.x,
; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z,
; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z,
; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z,
; CM-NEXT: 1109008539(3.853184e+01), -1036817932(-4.485347e+01)
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
-; CM-NEXT: SETGT T2.X, literal.x, KC0[3].Y,
+; CM-NEXT: SETGT T3.X, literal.x, KC0[3].Y,
; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0,
-; CM-NEXT: CNDE T0.Z, PV.X, T3.X, literal.y,
+; CM-NEXT: CNDE T0.Z, PV.X, T2.X, literal.y,
; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z,
; CM-NEXT: -1036817932(-4.485347e+01), 2139095040(INF)
; CM-NEXT: 1109008539(3.853184e+01), 0(0.000000e+00)
; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x,
-; CM-NEXT: CNDE T1.Z, PV.X, T0.X, 0.0,
+; CM-NEXT: CNDE T1.Z, PV.X, T1.X, 0.0,
; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y,
; CM-NEXT: 2139095040(INF), 1109008539(3.853184e+01)
; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index b1a82daa8e7db..b3f4790df4d48 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -795,17 +795,17 @@ define amdgpu_kernel void @shl_i64(ptr addrspace(1) %out, ptr addrspace(1) %in)
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: AND_INT T1.Y, T0.Z, literal.x,
-; EG-NEXT: LSHR T1.Z, T0.Y, 1,
+; EG-NEXT: LSHR T1.Y, T0.Y, 1,
+; EG-NEXT: NOT_INT T1.Z, T0.Z,
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
-; EG-NEXT: NOT_INT * T1.W, T0.Z,
+; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL T0.W, T0.X, PV.Y,
+; EG-NEXT: LSHL T2.Z, T0.X, PS,
+; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT: CNDE_INT T0.X, T1.W, T2.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
@@ -858,8 +858,8 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 22, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
@@ -868,27 +868,28 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
; EG-NEXT: ALU clause starting at 10:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x,
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: LSHL T2.X, T0.Z, PV.W,
+; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: LSHR T2.Z, T0.W, 1,
-; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
+; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, BS:VEC_102/SCL_221
; EG-NEXT: NOT_INT * T1.W, T1.Z,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T3.X, PV.Z, PV.W, PS,
+; EG-NEXT: LSHR T2.Y, T0.Y, 1,
+; EG-NEXT: NOT_INT T0.Z, T1.X,
+; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
+; EG-NEXT: AND_INT * T1.W, T1.X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL * T1.W, T0.Z, PV.Y,
-; EG-NEXT: AND_INT T2.X, T1.Z, literal.x,
-; EG-NEXT: AND_INT T1.Y, T1.X, literal.y,
-; EG-NEXT: LSHR T0.Z, T0.Y, 1,
-; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
-; EG-NEXT: NOT_INT * T3.W, T1.X,
-; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
-; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL T0.Z, T0.X, PV.Y,
-; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: CNDE_INT * T3.W, PV.X, T0.W, T1.W,
+; EG-NEXT: LSHL T0.Y, T0.X, PS, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T1.Z, T1.X, literal.x, BS:VEC_201
+; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
+; EG-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, T2.X,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; EG-NEXT: CNDE_INT * T3.Z, T2.X, T1.W, 0.0,
-; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0,
+; EG-NEXT: CNDE_INT T2.Y, PV.Z, PV.W, PV.Y,
+; EG-NEXT: CNDE_INT * T2.Z, T1.Y, T2.X, 0.0,
+; EG-NEXT: CNDE_INT T2.X, T1.Z, T0.Y, 0.0,
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
@@ -955,65 +956,66 @@ define amdgpu_kernel void @shl_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 47, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1
+; EG-NEXT: ALU 48, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1
-; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1
-; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 32, #1
-; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1
+; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1
+; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 48, #1
+; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1
+; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: AND_INT T4.Z, T1.Z, literal.x,
-; EG-NEXT: LSHR T1.W, T0.W, 1,
-; EG-NEXT: NOT_INT * T3.W, T1.Z,
+; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1,
-; EG-NEXT: AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201
-; EG-NEXT: LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212
-; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221
-; EG-NEXT: NOT_INT * T2.W, T3.Z,
-; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL T2.Z, T2.Z, PV.Y,
-; EG-NEXT: BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W,
-; EG-NEXT: LSHL * T1.W, T0.Z, T4.Z,
+; EG-NEXT: LSHL * T1.W, T0.Z, PV.W,
; EG-NEXT: AND_INT T4.X, T1.Z, literal.x,
-; EG-NEXT: AND_INT T1.Y, T1.X, literal.y,
-; EG-NEXT: LSHR T0.Z, T0.Y, 1,
-; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
-; EG-NEXT: NOT_INT * T3.W, T1.X,
+; EG-NEXT: LSHR T1.Y, T3.W, 1,
+; EG-NEXT: NOT_INT T4.Z, T2.Z, BS:VEC_201
+; EG-NEXT: BIT_ALIGN_INT T2.W, T3.W, T3.Z, 1,
+; EG-NEXT: AND_INT * T3.W, T2.Z, literal.y,
; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
-; EG-NEXT: AND_INT T5.X, T3.Z, literal.x,
-; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL T0.Z, T0.X, PV.Y,
-; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: CNDE_INT * T4.W, PV.X, T0.W, T1.W,
+; EG-NEXT: LSHL T5.X, T3.Z, PS,
+; EG-NEXT: AND_INT T2.Y, T2.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: BIT_ALIGN_INT T2.Z, PV.Y, PV.W, PV.Z,
+; EG-NEXT: LSHR T2.W, T3.Y, 1,
+; EG-NEXT: NOT_INT * T3.W, T2.X,
+; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T6.X, T3.Y, T3.X, 1,
+; EG-NEXT: AND_INT T1.Y, T2.X, literal.x,
+; EG-NEXT: LSHR T3.Z, T0.W, 1,
+; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
+; EG-NEXT: NOT_INT * T4.W, T1.Z,
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT: BIT_ALIGN_INT T7.X, PV.Z, PV.W, PS,
+; EG-NEXT: LSHL T1.Y, T3.X, PV.Y, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T0.Z, T2.X, literal.x, BS:VEC_201
+; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, PV.X, T3.W,
+; EG-NEXT: CNDE_INT * T3.W, T2.Y, T2.Z, T5.X,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.X, T3.X, literal.x,
-; EG-NEXT: CNDE_INT T4.Y, PV.W, PV.Y, PV.Z,
-; EG-NEXT: LSHR T1.Z, T2.Y, 1,
-; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1,
-; EG-NEXT: NOT_INT * T3.W, T3.X,
+; EG-NEXT: LSHR T2.X, T0.Y, 1,
+; EG-NEXT: CNDE_INT T3.Y, PV.Z, PV.W, PV.Y,
+; EG-NEXT: NOT_INT T1.Z, T1.X,
+; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
+; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS,
-; EG-NEXT: LSHL T0.Y, T2.X, PV.X,
-; EG-NEXT: CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212
-; EG-NEXT: AND_INT * T0.W, T3.X, literal.x, BS:VEC_201
+; EG-NEXT: LSHL T0.X, T0.X, PS,
+; EG-NEXT: AND_INT T0.Y, T1.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: CNDE_INT T3.Z, T2.Y, T5.X, 0.0, BS:VEC_021/SCL_122
+; EG-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.W, PV.Z,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT * T1.W, T5.X, T3.Y, T2.Z,
-; EG-NEXT: CNDE_INT T4.X, T2.W, T0.Z, 0.0,
-; EG-NEXT: CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212
-; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
+; EG-NEXT: CNDE_INT * T2.W, T4.X, T7.X, T1.W,
+; EG-NEXT: CNDE_INT T3.X, T0.Z, T1.Y, 0.0,
+; EG-NEXT: CNDE_INT T2.Y, T0.Y, T0.W, T0.X,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T0.X, PV.W, literal.x,
-; EG-NEXT: CNDE_INT T1.Z, T5.X, T2.Z, 0.0,
-; EG-NEXT: CNDE_INT * T1.X, T0.W, T0.Y, 0.0,
+; EG-NEXT: LSHR T1.X, PV.W, literal.x,
+; EG-NEXT: CNDE_INT T2.Z, T4.X, T1.W, 0.0,
+; EG-NEXT: CNDE_INT * T2.X, T0.Y, T0.X, 0.0,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
%a = load <4 x i64>, ptr addrspace(1) %in
@@ -1172,17 +1174,17 @@ define amdgpu_kernel void @s_shl_constant_i64(ptr addrspace(1) %out, i64 %a) {
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
-; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT: MOV T0.W, literal.y,
-; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
-; EG-NEXT: 31(4.344025e-44), -1(nan)
-; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
-; EG-NEXT: LSHL T0.W, literal.y, PV.Z,
+; EG-NEXT: MOV T0.Z, literal.x,
+; EG-NEXT: NOT_INT T0.W, KC0[2].W,
+; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
+; EG-NEXT: -1(nan), 31(4.344025e-44)
+; EG-NEXT: LSHL T1.Z, literal.x, PS,
+; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT: 32767(4.591635e-41), -1(nan)
+; EG-NEXT: -1(nan), 32767(4.591635e-41)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 281474976710655, %a
@@ -1423,15 +1425,15 @@ define amdgpu_kernel void @s_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr ad
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
-; EG-NEXT: NOT_INT T0.W, KC0[2].W,
-; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x,
+; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.Z, literal.x, PS,
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
-; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
-; EG-NEXT: 64(8.968310e-44), 32(4.484155e-44)
-; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
-; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0,
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
+; EG-NEXT: AND_INT T1.W, KC0[2].W, literal.x,
+; EG-NEXT: LSHL * T0.W, literal.y, PV.W,
+; EG-NEXT: 32(4.484155e-44), 64(8.968310e-44)
+; EG-NEXT: CNDE_INT * T0.Y, PV.W, PV.Z, PS,
+; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 64, %a
@@ -1903,16 +1905,16 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, p
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
-; EG-NEXT: NOT_INT T0.W, KC0[2].W,
-; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x,
+; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.Z, literal.x, PS,
-; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
-; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT: 1082130432(4.000000e+00), 541065216(1.626303e-19)
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
-; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0,
+; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
+; EG-NEXT: AND_INT T1.W, KC0[2].W, literal.y,
+; EG-NEXT: LSHL * T0.W, literal.z, PV.W,
+; EG-NEXT: 541065216(1.626303e-19), 32(4.484155e-44)
+; EG-NEXT: 1082130432(4.000000e+00), 0(0.000000e+00)
+; EG-NEXT: CNDE_INT * T0.Y, PV.W, PV.Z, PS,
+; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 1082130432, %a
@@ -1959,17 +1961,17 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %ou
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 4:
-; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT: MOV T0.W, literal.y,
-; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
-; EG-NEXT: 31(4.344025e-44), -532676608(-5.534023e+19)
-; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
-; EG-NEXT: LSHL T0.W, literal.y, PV.Z,
+; EG-NEXT: MOV T0.Z, literal.x,
+; EG-NEXT: NOT_INT T0.W, KC0[2].W,
+; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
+; EG-NEXT: -532676608(-5.534023e+19), 31(4.344025e-44)
+; EG-NEXT: LSHL T1.Z, literal.x, PS,
+; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT: 2147483647(nan), -1065353216(-4.000000e+00)
+; EG-NEXT: -1065353216(-4.000000e+00), 2147483647(nan)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 -1065353216, %a
diff --git a/llvm/test/CodeGen/X86/misched-critical-path.ll b/llvm/test/CodeGen/X86/misched-critical-path.ll
new file mode 100644
index 0000000000000..909692aca2b0a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/misched-critical-path.ll
@@ -0,0 +1,240 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+ at sc = common global i8 0
+ at uc = common global i8 0
+ at ss = common global i16 0
+ at us = common global i16 0
+ at si = common global i32 0
+ at ui = common global i32 0
+ at sl = common global i64 0
+ at ul = common global i64 0
+ at sll = common global i64 0
+ at ull = common global i64 0
+
+; Regression Test for PR92368.
+;
+; CHECK: SU(75): CMP8rr %49:gr8, %48:gr8, implicit-def $eflags
+; CHECK: Predecessors:
+; CHECK-NEXT: SU(73): Data Latency=0 Reg=%49
+; CHECK-NEXT: SU(74): Out Latency=0
+; CHECK-NEXT: SU(72): Out Latency=0
+; CHECK-NEXT: SU(70): Data Latency=4 Reg=%48
+define void @misched_bug() nounwind {
+entry:
+ %0 = load i8, i8* @sc, align 1
+ %1 = zext i8 %0 to i32
+ %2 = load i8, i8* @uc, align 1
+ %3 = zext i8 %2 to i32
+ %4 = trunc i32 %3 to i8
+ %5 = trunc i32 %1 to i8
+ %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
+ %6 = extractvalue { i8, i1 } %pair6, 0
+ store i8 %6, i8* @sc, align 1
+ %7 = load i8, i8* @sc, align 1
+ %8 = zext i8 %7 to i32
+ %9 = load i8, i8* @uc, align 1
+ %10 = zext i8 %9 to i32
+ %11 = trunc i32 %10 to i8
+ %12 = trunc i32 %8 to i8
+ %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
+ %13 = extractvalue { i8, i1 } %pair13, 0
+ store i8 %13, i8* @uc, align 1
+ %14 = load i8, i8* @sc, align 1
+ %15 = sext i8 %14 to i16
+ %16 = zext i16 %15 to i32
+ %17 = load i8, i8* @uc, align 1
+ %18 = zext i8 %17 to i32
+ %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+ %20 = trunc i32 %18 to i16
+ %21 = trunc i32 %16 to i16
+ %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
+ %22 = extractvalue { i16, i1 } %pair22, 0
+ store i16 %22, i16* @ss, align 2
+ %23 = load i8, i8* @sc, align 1
+ %24 = sext i8 %23 to i16
+ %25 = zext i16 %24 to i32
+ %26 = load i8, i8* @uc, align 1
+ %27 = zext i8 %26 to i32
+ %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+ %29 = trunc i32 %27 to i16
+ %30 = trunc i32 %25 to i16
+ %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
+ %31 = extractvalue { i16, i1 } %pair31, 0
+ store i16 %31, i16* @us, align 2
+ %32 = load i8, i8* @sc, align 1
+ %33 = sext i8 %32 to i32
+ %34 = load i8, i8* @uc, align 1
+ %35 = zext i8 %34 to i32
+ %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+ %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
+ %37 = extractvalue { i32, i1 } %pair37, 0
+ store i32 %37, i32* @si, align 4
+ %38 = load i8, i8* @sc, align 1
+ %39 = sext i8 %38 to i32
+ %40 = load i8, i8* @uc, align 1
+ %41 = zext i8 %40 to i32
+ %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+ %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
+ %43 = extractvalue { i32, i1 } %pair43, 0
+ store i32 %43, i32* @ui, align 4
+ %44 = load i8, i8* @sc, align 1
+ %45 = sext i8 %44 to i64
+ %46 = load i8, i8* @uc, align 1
+ %47 = zext i8 %46 to i64
+ %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+ %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
+ %49 = extractvalue { i64, i1 } %pair49, 0
+ store i64 %49, i64* @sl, align 8
+ %50 = load i8, i8* @sc, align 1
+ %51 = sext i8 %50 to i64
+ %52 = load i8, i8* @uc, align 1
+ %53 = zext i8 %52 to i64
+ %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+ %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
+ %55 = extractvalue { i64, i1 } %pair55, 0
+ store i64 %55, i64* @ul, align 8
+ %56 = load i8, i8* @sc, align 1
+ %57 = sext i8 %56 to i64
+ %58 = load i8, i8* @uc, align 1
+ %59 = zext i8 %58 to i64
+ %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+ %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
+ %61 = extractvalue { i64, i1 } %pair61, 0
+ store i64 %61, i64* @sll, align 8
+ %62 = load i8, i8* @sc, align 1
+ %63 = sext i8 %62 to i64
+ %64 = load i8, i8* @uc, align 1
+ %65 = zext i8 %64 to i64
+ %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+ %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
+ %67 = extractvalue { i64, i1 } %pair67, 0
+ store i64 %67, i64* @ull, align 8
+ %68 = load i8, i8* @sc, align 1
+ %69 = zext i8 %68 to i32
+ %70 = load i8, i8* @uc, align 1
+ %71 = zext i8 %70 to i32
+ %72 = trunc i32 %71 to i8
+ %73 = trunc i32 %69 to i8
+ %pair74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
+ %74 = extractvalue { i8, i1 } %pair74, 0
+ %75 = icmp eq i8 %74, %72
+ %76 = zext i1 %75 to i8
+ %77 = zext i8 %76 to i32
+ store i32 %77, i32* @ui, align 4
+ %78 = load i8, i8* @sc, align 1
+ %79 = zext i8 %78 to i32
+ %80 = load i8, i8* @uc, align 1
+ %81 = zext i8 %80 to i32
+ %82 = trunc i32 %81 to i8
+ %83 = trunc i32 %79 to i8
+ %pair84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
+ %84 = extractvalue { i8, i1 } %pair84, 0
+ %85 = icmp eq i8 %84, %82
+ %86 = zext i1 %85 to i8
+ %87 = zext i8 %86 to i32
+ store i32 %87, i32* @ui, align 4
+ %88 = load i8, i8* @sc, align 1
+ %89 = sext i8 %88 to i16
+ %90 = zext i16 %89 to i32
+ %91 = load i8, i8* @uc, align 1
+ %92 = zext i8 %91 to i32
+ %93 = trunc i32 %92 to i8
+ %94 = trunc i32 %90 to i8
+ %pair95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
+ %95 = extractvalue { i8, i1 } %pair95, 0
+ %96 = icmp eq i8 %95, %93
+ %97 = zext i1 %96 to i8
+ %98 = zext i8 %97 to i32
+ store i32 %98, i32* @ui, align 4
+ %99 = load i8, i8* @sc, align 1
+ %100 = sext i8 %99 to i16
+ %101 = zext i16 %100 to i32
+ %102 = load i8, i8* @uc, align 1
+ %103 = zext i8 %102 to i32
+ %104 = trunc i32 %103 to i8
+ %105 = trunc i32 %101 to i8
+ %pair106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
+ %106 = extractvalue { i8, i1 } %pair106, 0
+ %107 = icmp eq i8 %106, %104
+ %108 = zext i1 %107 to i8
+ %109 = zext i8 %108 to i32
+ store i32 %109, i32* @ui, align 4
+ %110 = load i8, i8* @sc, align 1
+ %111 = sext i8 %110 to i32
+ %112 = load i8, i8* @uc, align 1
+ %113 = zext i8 %112 to i32
+ %114 = trunc i32 %113 to i8
+ %115 = trunc i32 %111 to i8
+ %pair116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
+ %116 = extractvalue { i8, i1 } %pair116, 0
+ %117 = icmp eq i8 %116, %114
+ %118 = zext i1 %117 to i8
+ %119 = zext i8 %118 to i32
+ store i32 %119, i32* @ui, align 4
+ %120 = load i8, i8* @sc, align 1
+ %121 = sext i8 %120 to i32
+ %122 = load i8, i8* @uc, align 1
+ %123 = zext i8 %122 to i32
+ %124 = trunc i32 %123 to i8
+ %125 = trunc i32 %121 to i8
+ %pair126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
+ %126 = extractvalue { i8, i1 } %pair126, 0
+ %127 = icmp eq i8 %126, %124
+ %128 = zext i1 %127 to i8
+ %129 = zext i8 %128 to i32
+ store i32 %129, i32* @ui, align 4
+ %130 = load i8, i8* @sc, align 1
+ %131 = sext i8 %130 to i64
+ %132 = load i8, i8* @uc, align 1
+ %133 = zext i8 %132 to i64
+ %134 = trunc i64 %133 to i8
+ %135 = trunc i64 %131 to i8
+ %pair136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
+ %136 = extractvalue { i8, i1 } %pair136, 0
+ %137 = icmp eq i8 %136, %134
+ %138 = zext i1 %137 to i8
+ %139 = zext i8 %138 to i32
+ store i32 %139, i32* @ui, align 4
+ %140 = load i8, i8* @sc, align 1
+ %141 = sext i8 %140 to i64
+ %142 = load i8, i8* @uc, align 1
+ %143 = zext i8 %142 to i64
+ %144 = trunc i64 %143 to i8
+ %145 = trunc i64 %141 to i8
+ %pair146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
+ %146 = extractvalue { i8, i1 } %pair146, 0
+ %147 = icmp eq i8 %146, %144
+ %148 = zext i1 %147 to i8
+ %149 = zext i8 %148 to i32
+ store i32 %149, i32* @ui, align 4
+ %150 = load i8, i8* @sc, align 1
+ %151 = sext i8 %150 to i64
+ %152 = load i8, i8* @uc, align 1
+ %153 = zext i8 %152 to i64
+ %154 = trunc i64 %153 to i8
+ %155 = trunc i64 %151 to i8
+ %pair156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
+ %156 = extractvalue { i8, i1 } %pair156, 0
+ %157 = icmp eq i8 %156, %154
+ %158 = zext i1 %157 to i8
+ %159 = zext i8 %158 to i32
+ store i32 %159, i32* @ui, align 4
+ %160 = load i8, i8* @sc, align 1
+ %161 = sext i8 %160 to i64
+ %162 = load i8, i8* @uc, align 1
+ %163 = zext i8 %162 to i64
+ %164 = trunc i64 %163 to i8
+ %165 = trunc i64 %161 to i8
+ %pair166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
+ %166 = extractvalue { i8, i1 } %pair166, 0
+ %167 = icmp eq i8 %166, %164
+ %168 = zext i1 %167 to i8
+ %169 = zext i8 %168 to i32
+ store i32 %169, i32* @ui, align 4
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
>From 2d83b289fcb0c80370bc775d3ef9fd1354105f0d Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Wed, 22 May 2024 17:32:08 +0800
Subject: [PATCH 2/2] Fix test case
---
.../test/CodeGen/X86/misched-critical-path.ll | 416 +++++++++---------
1 file changed, 205 insertions(+), 211 deletions(-)
diff --git a/llvm/test/CodeGen/X86/misched-critical-path.ll b/llvm/test/CodeGen/X86/misched-critical-path.ll
index 909692aca2b0a..7db1257fb7381 100644
--- a/llvm/test/CodeGen/X86/misched-critical-path.ll
+++ b/llvm/test/CodeGen/X86/misched-critical-path.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
+; REQUIRES: asserts
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
@@ -23,218 +24,211 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-NEXT: SU(70): Data Latency=4 Reg=%48
define void @misched_bug() nounwind {
entry:
- %0 = load i8, i8* @sc, align 1
- %1 = zext i8 %0 to i32
- %2 = load i8, i8* @uc, align 1
- %3 = zext i8 %2 to i32
- %4 = trunc i32 %3 to i8
- %5 = trunc i32 %1 to i8
- %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
- %6 = extractvalue { i8, i1 } %pair6, 0
- store i8 %6, i8* @sc, align 1
- %7 = load i8, i8* @sc, align 1
- %8 = zext i8 %7 to i32
- %9 = load i8, i8* @uc, align 1
- %10 = zext i8 %9 to i32
- %11 = trunc i32 %10 to i8
- %12 = trunc i32 %8 to i8
- %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
- %13 = extractvalue { i8, i1 } %pair13, 0
- store i8 %13, i8* @uc, align 1
- %14 = load i8, i8* @sc, align 1
- %15 = sext i8 %14 to i16
- %16 = zext i16 %15 to i32
- %17 = load i8, i8* @uc, align 1
- %18 = zext i8 %17 to i32
- %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
- %20 = trunc i32 %18 to i16
- %21 = trunc i32 %16 to i16
- %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
- %22 = extractvalue { i16, i1 } %pair22, 0
- store i16 %22, i16* @ss, align 2
- %23 = load i8, i8* @sc, align 1
- %24 = sext i8 %23 to i16
- %25 = zext i16 %24 to i32
- %26 = load i8, i8* @uc, align 1
- %27 = zext i8 %26 to i32
- %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
- %29 = trunc i32 %27 to i16
- %30 = trunc i32 %25 to i16
- %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
- %31 = extractvalue { i16, i1 } %pair31, 0
- store i16 %31, i16* @us, align 2
- %32 = load i8, i8* @sc, align 1
- %33 = sext i8 %32 to i32
- %34 = load i8, i8* @uc, align 1
- %35 = zext i8 %34 to i32
- %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
- %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
- %37 = extractvalue { i32, i1 } %pair37, 0
- store i32 %37, i32* @si, align 4
- %38 = load i8, i8* @sc, align 1
- %39 = sext i8 %38 to i32
- %40 = load i8, i8* @uc, align 1
- %41 = zext i8 %40 to i32
- %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
- %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
- %43 = extractvalue { i32, i1 } %pair43, 0
- store i32 %43, i32* @ui, align 4
- %44 = load i8, i8* @sc, align 1
- %45 = sext i8 %44 to i64
- %46 = load i8, i8* @uc, align 1
- %47 = zext i8 %46 to i64
- %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
- %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
- %49 = extractvalue { i64, i1 } %pair49, 0
- store i64 %49, i64* @sl, align 8
- %50 = load i8, i8* @sc, align 1
- %51 = sext i8 %50 to i64
- %52 = load i8, i8* @uc, align 1
- %53 = zext i8 %52 to i64
- %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
- %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
- %55 = extractvalue { i64, i1 } %pair55, 0
- store i64 %55, i64* @ul, align 8
- %56 = load i8, i8* @sc, align 1
- %57 = sext i8 %56 to i64
- %58 = load i8, i8* @uc, align 1
- %59 = zext i8 %58 to i64
- %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
- %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
- %61 = extractvalue { i64, i1 } %pair61, 0
- store i64 %61, i64* @sll, align 8
- %62 = load i8, i8* @sc, align 1
- %63 = sext i8 %62 to i64
- %64 = load i8, i8* @uc, align 1
- %65 = zext i8 %64 to i64
- %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
- %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
- %67 = extractvalue { i64, i1 } %pair67, 0
- store i64 %67, i64* @ull, align 8
- %68 = load i8, i8* @sc, align 1
- %69 = zext i8 %68 to i32
- %70 = load i8, i8* @uc, align 1
- %71 = zext i8 %70 to i32
- %72 = trunc i32 %71 to i8
- %73 = trunc i32 %69 to i8
- %pair74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
- %74 = extractvalue { i8, i1 } %pair74, 0
- %75 = icmp eq i8 %74, %72
- %76 = zext i1 %75 to i8
- %77 = zext i8 %76 to i32
- store i32 %77, i32* @ui, align 4
- %78 = load i8, i8* @sc, align 1
- %79 = zext i8 %78 to i32
- %80 = load i8, i8* @uc, align 1
- %81 = zext i8 %80 to i32
- %82 = trunc i32 %81 to i8
- %83 = trunc i32 %79 to i8
- %pair84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
- %84 = extractvalue { i8, i1 } %pair84, 0
- %85 = icmp eq i8 %84, %82
- %86 = zext i1 %85 to i8
- %87 = zext i8 %86 to i32
- store i32 %87, i32* @ui, align 4
- %88 = load i8, i8* @sc, align 1
- %89 = sext i8 %88 to i16
- %90 = zext i16 %89 to i32
- %91 = load i8, i8* @uc, align 1
- %92 = zext i8 %91 to i32
- %93 = trunc i32 %92 to i8
- %94 = trunc i32 %90 to i8
- %pair95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
- %95 = extractvalue { i8, i1 } %pair95, 0
- %96 = icmp eq i8 %95, %93
- %97 = zext i1 %96 to i8
- %98 = zext i8 %97 to i32
- store i32 %98, i32* @ui, align 4
- %99 = load i8, i8* @sc, align 1
- %100 = sext i8 %99 to i16
- %101 = zext i16 %100 to i32
- %102 = load i8, i8* @uc, align 1
- %103 = zext i8 %102 to i32
- %104 = trunc i32 %103 to i8
- %105 = trunc i32 %101 to i8
- %pair106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
- %106 = extractvalue { i8, i1 } %pair106, 0
- %107 = icmp eq i8 %106, %104
- %108 = zext i1 %107 to i8
- %109 = zext i8 %108 to i32
- store i32 %109, i32* @ui, align 4
- %110 = load i8, i8* @sc, align 1
- %111 = sext i8 %110 to i32
- %112 = load i8, i8* @uc, align 1
- %113 = zext i8 %112 to i32
- %114 = trunc i32 %113 to i8
- %115 = trunc i32 %111 to i8
- %pair116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
- %116 = extractvalue { i8, i1 } %pair116, 0
- %117 = icmp eq i8 %116, %114
- %118 = zext i1 %117 to i8
- %119 = zext i8 %118 to i32
- store i32 %119, i32* @ui, align 4
- %120 = load i8, i8* @sc, align 1
- %121 = sext i8 %120 to i32
- %122 = load i8, i8* @uc, align 1
- %123 = zext i8 %122 to i32
- %124 = trunc i32 %123 to i8
- %125 = trunc i32 %121 to i8
- %pair126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
- %126 = extractvalue { i8, i1 } %pair126, 0
- %127 = icmp eq i8 %126, %124
- %128 = zext i1 %127 to i8
- %129 = zext i8 %128 to i32
- store i32 %129, i32* @ui, align 4
- %130 = load i8, i8* @sc, align 1
- %131 = sext i8 %130 to i64
- %132 = load i8, i8* @uc, align 1
- %133 = zext i8 %132 to i64
- %134 = trunc i64 %133 to i8
- %135 = trunc i64 %131 to i8
- %pair136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
- %136 = extractvalue { i8, i1 } %pair136, 0
- %137 = icmp eq i8 %136, %134
- %138 = zext i1 %137 to i8
- %139 = zext i8 %138 to i32
- store i32 %139, i32* @ui, align 4
- %140 = load i8, i8* @sc, align 1
- %141 = sext i8 %140 to i64
- %142 = load i8, i8* @uc, align 1
- %143 = zext i8 %142 to i64
- %144 = trunc i64 %143 to i8
- %145 = trunc i64 %141 to i8
- %pair146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
- %146 = extractvalue { i8, i1 } %pair146, 0
- %147 = icmp eq i8 %146, %144
- %148 = zext i1 %147 to i8
- %149 = zext i8 %148 to i32
- store i32 %149, i32* @ui, align 4
- %150 = load i8, i8* @sc, align 1
- %151 = sext i8 %150 to i64
- %152 = load i8, i8* @uc, align 1
- %153 = zext i8 %152 to i64
- %154 = trunc i64 %153 to i8
- %155 = trunc i64 %151 to i8
- %pair156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
- %156 = extractvalue { i8, i1 } %pair156, 0
- %157 = icmp eq i8 %156, %154
- %158 = zext i1 %157 to i8
- %159 = zext i8 %158 to i32
- store i32 %159, i32* @ui, align 4
- %160 = load i8, i8* @sc, align 1
- %161 = sext i8 %160 to i64
- %162 = load i8, i8* @uc, align 1
- %163 = zext i8 %162 to i64
- %164 = trunc i64 %163 to i8
- %165 = trunc i64 %161 to i8
- %pair166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
- %166 = extractvalue { i8, i1 } %pair166, 0
- %167 = icmp eq i8 %166, %164
- %168 = zext i1 %167 to i8
- %169 = zext i8 %168 to i32
- store i32 %169, i32* @ui, align 4
+ %v0 = load i8, ptr @sc, align 1
+ %v1 = zext i8 %v0 to i32
+ %v2 = load i8, ptr @uc, align 1
+ %v3 = zext i8 %v2 to i32
+ %v4 = trunc i32 %v3 to i8
+ %v5 = trunc i32 %v1 to i8
+ %pair6 = cmpxchg ptr @sc, i8 %v4, i8 %v5 monotonic monotonic
+ %v6 = extractvalue { i8, i1 } %pair6, 0
+ store i8 %v6, ptr @sc, align 1
+ %v7 = load i8, ptr @sc, align 1
+ %v8 = zext i8 %v7 to i32
+ %v9 = load i8, ptr @uc, align 1
+ %v10 = zext i8 %v9 to i32
+ %v11 = trunc i32 %v10 to i8
+ %v12 = trunc i32 %v8 to i8
+ %pair13 = cmpxchg ptr @uc, i8 %v11, i8 %v12 monotonic monotonic
+ %v13 = extractvalue { i8, i1 } %pair13, 0
+ store i8 %v13, ptr @uc, align 1
+ %v14 = load i8, ptr @sc, align 1
+ %v15 = sext i8 %v14 to i16
+ %v16 = zext i16 %v15 to i32
+ %v17 = load i8, ptr @uc, align 1
+ %v18 = zext i8 %v17 to i32
+ %v20 = trunc i32 %v18 to i16
+ %v21 = trunc i32 %v16 to i16
+ %pair22 = cmpxchg ptr @ss, i16 %v20, i16 %v21 monotonic monotonic
+ %v22 = extractvalue { i16, i1 } %pair22, 0
+ store i16 %v22, ptr @ss, align 2
+ %v23 = load i8, ptr @sc, align 1
+ %v24 = sext i8 %v23 to i16
+ %v25 = zext i16 %v24 to i32
+ %v26 = load i8, ptr @uc, align 1
+ %v27 = zext i8 %v26 to i32
+ %v29 = trunc i32 %v27 to i16
+ %v30 = trunc i32 %v25 to i16
+ %pair31 = cmpxchg ptr @us, i16 %v29, i16 %v30 monotonic monotonic
+ %v31 = extractvalue { i16, i1 } %pair31, 0
+ store i16 %v31, ptr @us, align 2
+ %v32 = load i8, ptr @sc, align 1
+ %v33 = sext i8 %v32 to i32
+ %v34 = load i8, ptr @uc, align 1
+ %v35 = zext i8 %v34 to i32
+ %pair37 = cmpxchg ptr @si, i32 %v35, i32 %v33 monotonic monotonic
+ %v37 = extractvalue { i32, i1 } %pair37, 0
+ store i32 %v37, ptr @si, align 4
+ %v38 = load i8, ptr @sc, align 1
+ %v39 = sext i8 %v38 to i32
+ %v40 = load i8, ptr @uc, align 1
+ %v41 = zext i8 %v40 to i32
+ %pair43 = cmpxchg ptr @ui, i32 %v41, i32 %v39 monotonic monotonic
+ %v43 = extractvalue { i32, i1 } %pair43, 0
+ store i32 %v43, ptr @ui, align 4
+ %v44 = load i8, ptr @sc, align 1
+ %v45 = sext i8 %v44 to i64
+ %v46 = load i8, ptr @uc, align 1
+ %v47 = zext i8 %v46 to i64
+ %pair49 = cmpxchg ptr @sl, i64 %v47, i64 %v45 monotonic monotonic
+ %v49 = extractvalue { i64, i1 } %pair49, 0
+ store i64 %v49, ptr @sl, align 8
+ %v50 = load i8, ptr @sc, align 1
+ %v51 = sext i8 %v50 to i64
+ %v52 = load i8, ptr @uc, align 1
+ %v53 = zext i8 %v52 to i64
+ %pair55 = cmpxchg ptr @ul, i64 %v53, i64 %v51 monotonic monotonic
+ %v55 = extractvalue { i64, i1 } %pair55, 0
+ store i64 %v55, ptr @ul, align 8
+ %v56 = load i8, ptr @sc, align 1
+ %v57 = sext i8 %v56 to i64
+ %v58 = load i8, ptr @uc, align 1
+ %v59 = zext i8 %v58 to i64
+ %pair61 = cmpxchg ptr @sll, i64 %v59, i64 %v57 monotonic monotonic
+ %v61 = extractvalue { i64, i1 } %pair61, 0
+ store i64 %v61, ptr @sll, align 8
+ %v62 = load i8, ptr @sc, align 1
+ %v63 = sext i8 %v62 to i64
+ %v64 = load i8, ptr @uc, align 1
+ %v65 = zext i8 %v64 to i64
+ %pair67 = cmpxchg ptr @ull, i64 %v65, i64 %v63 monotonic monotonic
+ %v67 = extractvalue { i64, i1 } %pair67, 0
+ store i64 %v67, ptr @ull, align 8
+ %v68 = load i8, ptr @sc, align 1
+ %v69 = zext i8 %v68 to i32
+ %v70 = load i8, ptr @uc, align 1
+ %v71 = zext i8 %v70 to i32
+ %v72 = trunc i32 %v71 to i8
+ %v73 = trunc i32 %v69 to i8
+ %pair74 = cmpxchg ptr @sc, i8 %v72, i8 %v73 monotonic monotonic
+ %v74 = extractvalue { i8, i1 } %pair74, 0
+ %v75 = icmp eq i8 %v74, %v72
+ %v76 = zext i1 %v75 to i8
+ %v77 = zext i8 %v76 to i32
+ store i32 %v77, ptr @ui, align 4
+ %v78 = load i8, ptr @sc, align 1
+ %v79 = zext i8 %v78 to i32
+ %v80 = load i8, ptr @uc, align 1
+ %v81 = zext i8 %v80 to i32
+ %v82 = trunc i32 %v81 to i8
+ %v83 = trunc i32 %v79 to i8
+ %pair84 = cmpxchg ptr @uc, i8 %v82, i8 %v83 monotonic monotonic
+ %v84 = extractvalue { i8, i1 } %pair84, 0
+ %v85 = icmp eq i8 %v84, %v82
+ %v86 = zext i1 %v85 to i8
+ %v87 = zext i8 %v86 to i32
+ store i32 %v87, ptr @ui, align 4
+ %v88 = load i8, ptr @sc, align 1
+ %v89 = sext i8 %v88 to i16
+ %v90 = zext i16 %v89 to i32
+ %v91 = load i8, ptr @uc, align 1
+ %v92 = zext i8 %v91 to i32
+ %v93 = trunc i32 %v92 to i8
+ %v94 = trunc i32 %v90 to i8
+ %pair95 = cmpxchg ptr @ss, i8 %v93, i8 %v94 monotonic monotonic
+ %v95 = extractvalue { i8, i1 } %pair95, 0
+ %v96 = icmp eq i8 %v95, %v93
+ %v97 = zext i1 %v96 to i8
+ %v98 = zext i8 %v97 to i32
+ store i32 %v98, ptr @ui, align 4
+ %v99 = load i8, ptr @sc, align 1
+ %v100 = sext i8 %v99 to i16
+ %v101 = zext i16 %v100 to i32
+ %v102 = load i8, ptr @uc, align 1
+ %v103 = zext i8 %v102 to i32
+ %v104 = trunc i32 %v103 to i8
+ %v105 = trunc i32 %v101 to i8
+ %pair106 = cmpxchg ptr @us, i8 %v104, i8 %v105 monotonic monotonic
+ %v106 = extractvalue { i8, i1 } %pair106, 0
+ %v107 = icmp eq i8 %v106, %v104
+ %v108 = zext i1 %v107 to i8
+ %v109 = zext i8 %v108 to i32
+ store i32 %v109, ptr @ui, align 4
+ %v110 = load i8, ptr @sc, align 1
+ %v111 = sext i8 %v110 to i32
+ %v112 = load i8, ptr @uc, align 1
+ %v113 = zext i8 %v112 to i32
+ %v114 = trunc i32 %v113 to i8
+ %v115 = trunc i32 %v111 to i8
+ %pair116 = cmpxchg ptr @si, i8 %v114, i8 %v115 monotonic monotonic
+ %v116 = extractvalue { i8, i1 } %pair116, 0
+ %v117 = icmp eq i8 %v116, %v114
+ %v118 = zext i1 %v117 to i8
+ %v119 = zext i8 %v118 to i32
+ store i32 %v119, ptr @ui, align 4
+ %v120 = load i8, ptr @sc, align 1
+ %v121 = sext i8 %v120 to i32
+ %v122 = load i8, ptr @uc, align 1
+ %v123 = zext i8 %v122 to i32
+ %v124 = trunc i32 %v123 to i8
+ %v125 = trunc i32 %v121 to i8
+ %pair126 = cmpxchg ptr @ui, i8 %v124, i8 %v125 monotonic monotonic
+ %v126 = extractvalue { i8, i1 } %pair126, 0
+ %v127 = icmp eq i8 %v126, %v124
+ %v128 = zext i1 %v127 to i8
+ %v129 = zext i8 %v128 to i32
+ store i32 %v129, ptr @ui, align 4
+ %v130 = load i8, ptr @sc, align 1
+ %v131 = sext i8 %v130 to i64
+ %v132 = load i8, ptr @uc, align 1
+ %v133 = zext i8 %v132 to i64
+ %v134 = trunc i64 %v133 to i8
+ %v135 = trunc i64 %v131 to i8
+ %pair136 = cmpxchg ptr @sl, i8 %v134, i8 %v135 monotonic monotonic
+ %v136 = extractvalue { i8, i1 } %pair136, 0
+ %v137 = icmp eq i8 %v136, %v134
+ %v138 = zext i1 %v137 to i8
+ %v139 = zext i8 %v138 to i32
+ store i32 %v139, ptr @ui, align 4
+ %v140 = load i8, ptr @sc, align 1
+ %v141 = sext i8 %v140 to i64
+ %v142 = load i8, ptr @uc, align 1
+ %v143 = zext i8 %v142 to i64
+ %v144 = trunc i64 %v143 to i8
+ %v145 = trunc i64 %v141 to i8
+ %pair146 = cmpxchg ptr @ul, i8 %v144, i8 %v145 monotonic monotonic
+ %v146 = extractvalue { i8, i1 } %pair146, 0
+ %v147 = icmp eq i8 %v146, %v144
+ %v148 = zext i1 %v147 to i8
+ %v149 = zext i8 %v148 to i32
+ store i32 %v149, ptr @ui, align 4
+ %v150 = load i8, ptr @sc, align 1
+ %v151 = sext i8 %v150 to i64
+ %v152 = load i8, ptr @uc, align 1
+ %v153 = zext i8 %v152 to i64
+ %v154 = trunc i64 %v153 to i8
+ %v155 = trunc i64 %v151 to i8
+ %pair156 = cmpxchg ptr @sll, i8 %v154, i8 %v155 monotonic monotonic
+ %v156 = extractvalue { i8, i1 } %pair156, 0
+ %v157 = icmp eq i8 %v156, %v154
+ %v158 = zext i1 %v157 to i8
+ %v159 = zext i8 %v158 to i32
+ store i32 %v159, ptr @ui, align 4
+ %v160 = load i8, ptr @sc, align 1
+ %v161 = sext i8 %v160 to i64
+ %v162 = load i8, ptr @uc, align 1
+ %v163 = zext i8 %v162 to i64
+ %v164 = trunc i64 %v163 to i8
+ %v165 = trunc i64 %v161 to i8
+ %pair166 = cmpxchg ptr @ull, i8 %v164, i8 %v165 monotonic monotonic
+ %v166 = extractvalue { i8, i1 } %pair166, 0
+ %v167 = icmp eq i8 %v166, %v164
+ %v168 = zext i1 %v167 to i8
+ %v169 = zext i8 %v168 to i32
+ store i32 %v169, ptr @ui, align 4
br label %return
-return: ; preds = %entry
+return: ; preds = %ventry
ret void
}
+
More information about the llvm-commits
mailing list