[llvm] [SelectionDAG] Added target hook check for SelectwithConstant (PR #127599)

Vikash Gupta via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 00:55:56 PST 2025


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/127599

>From 71dbc407269dad47a2aa19d7f02aee2f5bc68c96 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 20 Feb 2025 14:23:19 +0530
Subject: [PATCH] [SelectionDAG] Added target hook check for SelectWithConstant

This patch adds the required convertSelectOfConstantsToMath() target
hook within SimplifySelectCC helper combine function in SelectionDAG
Isel, where generic select folding with constants is happening into
simple maths op using the condition as it is.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |    3 +-
 llvm/test/CodeGen/AArch64/bfis-in-loop.ll     |   78 +-
 llvm/test/CodeGen/AArch64/select_cc.ll        |   16 +-
 llvm/test/CodeGen/AArch64/selectopt-const.ll  |   16 +-
 .../CodeGen/AMDGPU/amdgcn.private-memory.ll   |    4 +-
 .../amdgpu-simplify-libcall-pow-codegen.ll    |   19 +-
 llvm/test/CodeGen/AMDGPU/bf16.ll              |   82 +-
 .../AMDGPU/copysign-simplify-demanded-bits.ll |    3 +-
 .../CodeGen/AMDGPU/dagcombine-fmul-sel.ll     |  133 +-
 .../CodeGen/AMDGPU/extract_vector_dynelt.ll   |   18 +-
 llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll     |  256 ++-
 llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll    |    6 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll |  271 +--
 .../CodeGen/AMDGPU/fneg-modifier-casting.ll   |   41 +-
 llvm/test/CodeGen/AMDGPU/fptrunc.ll           |  284 ++-
 llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll         |   75 +-
 llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll         |   67 +-
 .../CodeGen/AMDGPU/indirect-addressing-si.ll  |   55 +-
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          | 1591 ++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        | 1591 ++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.log2.ll         |  557 +++---
 llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll |   86 +-
 .../CodeGen/AMDGPU/private-memory-atomics.ll  |    8 +-
 .../AMDGPU/pseudo-scalar-transcendental.ll    |   28 +-
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           |  530 +++---
 .../CodeGen/AMDGPU/vector-alloca-bitcast.ll   |   20 +-
 llvm/test/CodeGen/ARM/select-imm.ll           |   18 +-
 .../CodeGen/MSP430/shift-amount-threshold.ll  |    3 +-
 llvm/test/CodeGen/Thumb/branchless-cmp.ll     |  104 +-
 29 files changed, 2781 insertions(+), 3182 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc7cdf38dbc2a..f5943c9eb5187 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -28192,7 +28192,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   if ((Fold || Swap) &&
       TLI.getBooleanContents(CmpOpVT) ==
           TargetLowering::ZeroOrOneBooleanContent &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
+      TLI.convertSelectOfConstantsToMath(VT)) {
 
     if (Swap) {
       CC = ISD::getSetCCInverse(CC, CmpOpVT);
diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
index 6b12d954b9d1c..43d49da1abd21 100644
--- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
+++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
@@ -13,25 +13,26 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 define i64 @bfis_in_loop_zero() {
 ; CHECK-LABEL: bfis_in_loop_zero:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT: 	adrp	x8, :got:global
-; CHECK-NEXT: 	mov	x0, xzr
-; CHECK-NEXT: 	mov	w9, wzr
-; CHECK-NEXT: 	ldr	x8, [x8, :got_lo12:global]
-; CHECK-NEXT: 	ldr	x8, [x8]
-; CHECK-NEXT: .LBB0_1:                                // %midblock
-; CHECK-NEXT:   // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:	ldrh	w10, [x8, #72]
-; CHECK-NEXT:	ldr	x13, [x8, #8]
-; CHECK-NEXT:	lsr	w11, w10, #8
-; CHECK-NEXT:	cmp	w10, #0
-; CHECK-NEXT:	ldr	x8, [x13, #16]
-; CHECK-NEXT:	cset	w12, ne
-; CHECK-NEXT:	csel	w9, w9, w11, eq
-; CHECK-NEXT:	and	x11, x0, #0xffffffff00000000
-; CHECK-NEXT:	bfi	w10, w9, #8, #24
-; CHECK-NEXT:	orr	x11, x11, x12, lsl #16
-; CHECK-NEXT:	orr	x0, x11, x10
-; CHECK-NEXT:	cbnz	x13, .LBB0_1
+; CHECK-NEXT:    adrp x9, :got:global
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    ldr x9, [x9, :got_lo12:global]
+; CHECK-NEXT:    mov w10, #65536 // =0x10000
+; CHECK-NEXT:    ldr x9, [x9]
+; CHECK-NEXT:  .LBB0_1: // %midblock
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldrh w11, [x9, #72]
+; CHECK-NEXT:    and x13, x0, #0xffffffff00000000
+; CHECK-NEXT:    lsr w12, w11, #8
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    csel w8, w8, w12, eq
+; CHECK-NEXT:    ldr x12, [x9, #8]
+; CHECK-NEXT:    csel x9, xzr, x10, eq
+; CHECK-NEXT:    bfi w11, w8, #8, #24
+; CHECK-NEXT:    orr x13, x9, x13
+; CHECK-NEXT:    ldr x9, [x12, #16]
+; CHECK-NEXT:    orr x0, x13, x11
+; CHECK-NEXT:    cbnz x12, .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
 entry:
@@ -80,25 +81,26 @@ exit:
 define i64 @bfis_in_loop_undef() {
 ; CHECK-LABEL: bfis_in_loop_undef:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT: 	adrp	x9, :got:global
-; CHECK-NEXT: 	mov	w8, wzr
-; CHECK-NEXT:                                         // implicit-def: $x0
-; CHECK-NEXT: 	ldr	x9, [x9, :got_lo12:global]
-; CHECK-NEXT: 	ldr	x9, [x9]
-; CHECK-NEXT: .LBB1_1:                                // %midblock
-; CHECK-NEXT:                                         // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:	ldrh	w10, [x9, #72]
-; CHECK-NEXT:	ldr	x13, [x9, #8]
-; CHECK-NEXT:	lsr	w11, w10, #8
-; CHECK-NEXT:	cmp	w10, #0
-; CHECK-NEXT:	ldr	x9, [x13, #16]
-; CHECK-NEXT:	cset	w12, ne
-; CHECK-NEXT:	csel	w8, w8, w11, eq
-; CHECK-NEXT:	and	x11, x0, #0xffffffff00000000
-; CHECK-NEXT:	bfi	w10, w8, #8, #24
-; CHECK-NEXT:	orr	x11, x11, x12, lsl #16
-; CHECK-NEXT:	orr	x0, x11, x10
-; CHECK-NEXT:	cbnz	x13, .LBB1_1
+; CHECK-NEXT:    adrp x9, :got:global
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    // implicit-def: $x0
+; CHECK-NEXT:    ldr x9, [x9, :got_lo12:global]
+; CHECK-NEXT:    ldr x10, [x9]
+; CHECK-NEXT:    mov w9, #65536 // =0x10000
+; CHECK-NEXT:  .LBB1_1: // %midblock
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldrh w11, [x10, #72]
+; CHECK-NEXT:    and x13, x0, #0xffffffff00000000
+; CHECK-NEXT:    lsr w12, w11, #8
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    csel w8, w8, w12, eq
+; CHECK-NEXT:    ldr x12, [x10, #8]
+; CHECK-NEXT:    csel x10, xzr, x9, eq
+; CHECK-NEXT:    bfi w11, w8, #8, #24
+; CHECK-NEXT:    orr x13, x10, x13
+; CHECK-NEXT:    ldr x10, [x12, #16]
+; CHECK-NEXT:    orr x0, x13, x11
+; CHECK-NEXT:    cbnz x12, .LBB1_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll
index 92c8087518151..4eed1578ace63 100644
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@@ -5,8 +5,8 @@ define i64 @select_ogt_float(float %a, float %b) {
 ; CHECK-LABEL: select_ogt_float:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    cset w8, gt
-; CHECK-NEXT:    ubfiz x0, x8, #2, #32
+; CHECK-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEXT:    csel x0, x8, xzr, gt
 ; CHECK-NEXT:    ret
 entry:
   %cc = fcmp ogt float %a, %b
@@ -18,8 +18,8 @@ define i64 @select_ule_float_inverse(float %a, float %b) {
 ; CHECK-LABEL: select_ule_float_inverse:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    cset w8, gt
-; CHECK-NEXT:    ubfiz x0, x8, #2, #32
+; CHECK-NEXT:    mov w8, #4 // =0x4
+; CHECK-NEXT:    csel x0, xzr, x8, le
 ; CHECK-NEXT:    ret
 entry:
   %cc = fcmp ule float %a, %b
@@ -30,9 +30,9 @@ entry:
 define i64 @select_eq_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: select_eq_i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #4 // =0x4
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    ubfiz x0, x8, #2, #32
+; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
 entry:
   %cc = icmp eq i32 %a, %b
@@ -43,9 +43,9 @@ entry:
 define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
 ; CHECK-LABEL: select_ne_i32_inverse:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #4 // =0x4
 ; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    cset w8, eq
-; CHECK-NEXT:    ubfiz x0, x8, #2, #32
+; CHECK-NEXT:    csel x0, xzr, x8, ne
 ; CHECK-NEXT:    ret
 entry:
   %cc = icmp ne i32 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/selectopt-const.ll b/llvm/test/CodeGen/AArch64/selectopt-const.ll
index f10327e136ad1..a44c746e0f281 100644
--- a/llvm/test/CodeGen/AArch64/selectopt-const.ll
+++ b/llvm/test/CodeGen/AArch64/selectopt-const.ll
@@ -13,24 +13,24 @@ define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) {
 ; CHECK-NEXT:    mov w8, w3
 ; CHECK-NEXT:    movk w9, #16309, lsl #16
 ; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    mov w9, #16 // =0x10
 ; CHECK-NEXT:    .p2align 5, , 16
 ; CHECK-NEXT:  .LBB0_2: // %for.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr s4, [x1], #4
-; CHECK-NEXT:    ldr w9, [x0], #4
-; CHECK-NEXT:    add w9, w9, #10
-; CHECK-NEXT:    scvtf d3, w9
+; CHECK-NEXT:    ldr w10, [x0], #4
+; CHECK-NEXT:    add w10, w10, #10
+; CHECK-NEXT:    scvtf d3, w10
 ; CHECK-NEXT:    fmadd s4, s4, s0, s1
 ; CHECK-NEXT:    fabs s4, s4
 ; CHECK-NEXT:    fcvt d4, s4
 ; CHECK-NEXT:    fdiv d3, d3, d4
 ; CHECK-NEXT:    fcmp d3, d2
-; CHECK-NEXT:    cset w9, lt
+; CHECK-NEXT:    csel x10, x9, xzr, lt
 ; CHECK-NEXT:    subs x8, x8, #1
-; CHECK-NEXT:    ubfiz x9, x9, #4, #32
-; CHECK-NEXT:    ldr s3, [x4, x9]
-; CHECK-NEXT:    fcvtzs w9, s3
-; CHECK-NEXT:    str w9, [x2], #4
+; CHECK-NEXT:    ldr s3, [x4, x10]
+; CHECK-NEXT:    fcvtzs w10, s3
+; CHECK-NEXT:    str w10, [x2], #4
 ; CHECK-NEXT:    b.ne .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: // %for.cond.cleanup
 ; CHECK-NEXT:    mov w0, wzr
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
index a3f27ebddf9d6..c9a4379a6dfcd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
 
 ; GCN-PROMOTE: s_cmp_eq_u32 [[IN]], 1
-; GCN-PROMOTE-NEXT: s_cselect_b64 vcc, -1, 0
-; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
+; GCN-PROMOTE-NEXT: s_cselect_b32 [[SCC:s[0-9]+]], 1, 0
+; GCN-PROMOTE-NEXT: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[SCC]], v0
 
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index ab2363860af9d..54b4888120e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -82,17 +82,16 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
 ; CHECK-NEXT:    v_cvt_f32_i32_e32 v1, v1
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; CHECK-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
 ; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
 ; CHECK-NEXT:    v_log_f32_e32 v3, v3
-; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
+; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
 ; CHECK-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; CHECK-NEXT:    v_sub_f32_e32 v2, v3, v2
-; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v4
 ; CHECK-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; CHECK-NEXT:    v_mul_f32_e32 v3, v2, v4
 ; CHECK-NEXT:    v_mov_b32_e32 v5, 0x42800000
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v3
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
@@ -228,8 +227,7 @@ define float @test_powr_fast_f32(float %x, float %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; CHECK-NEXT:    v_ldexp_f32 v0, v0, v3
 ; CHECK-NEXT:    v_log_f32_e32 v0, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0x42000000
@@ -368,8 +366,7 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
 ; CHECK-NEXT:    v_log_f32_e32 v3, v3
 ; CHECK-NEXT:    v_cvt_f32_i32_e32 v4, v1
@@ -511,8 +508,7 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; CHECK-NEXT:    v_ldexp_f32 v0, |v0|, v3
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
 ; CHECK-NEXT:    v_log_f32_e32 v0, v0
@@ -651,8 +647,7 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x800000
 ; CHECK-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; CHECK-NEXT:    v_ldexp_f32 v3, |v0|, v3
 ; CHECK-NEXT:    v_or_b32_e32 v1, 1, v1
 ; CHECK-NEXT:    v_log_f32_e32 v3, v3
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 0382cc72a36ae..ba5e22c7f93ff 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -25056,8 +25056,7 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x41b17218
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GCN-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
 ; GCN-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
@@ -25083,8 +25082,7 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX7-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_log_f32_e32 v0, v0
 ; GFX7-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -25108,8 +25106,7 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -25142,8 +25139,7 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX9-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -25172,8 +25168,7 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
@@ -25198,30 +25193,28 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x41b17218, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_bfe_u32 v1, v0, 16, 1
 ; GFX11-NEXT:    v_or_b32_e32 v2, 0x400000, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %op = call bfloat @llvm.log.bf16(bfloat %a)
@@ -25237,8 +25230,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GCN-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -25253,8 +25245,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX7-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_log_f32_e32 v0, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -25269,8 +25260,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -25291,8 +25281,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX9-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -25312,9 +25301,8 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -25332,21 +25320,20 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_bfe_u32 v1, v0, 16, 1
 ; GFX11-NEXT:    v_or_b32_e32 v2, 0x400000, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add3_u32 v1, v1, v0, 0x7fff
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %op = call bfloat @llvm.log2.bf16(bfloat %a)
@@ -25363,8 +25350,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0x411a209b
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GCN-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GCN-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
 ; GCN-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
@@ -25390,8 +25376,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX7-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_log_f32_e32 v0, v0
 ; GFX7-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -25415,8 +25400,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX8-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX8-NEXT:    v_log_f32_e32 v0, v0
 ; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -25449,8 +25433,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX9-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX9-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX9-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX9-NEXT:    v_log_f32_e32 v0, v0
 ; GFX9-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -25479,8 +25462,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX10-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX10-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX10-NEXT:    v_log_f32_e32 v0, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
@@ -25505,30 +25487,28 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX11-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_log_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x411a209b, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_bfe_u32 v1, v0, 16, 1
 ; GFX11-NEXT:    v_or_b32_e32 v2, 0x400000, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %op = call bfloat @llvm.log10.bf16(bfloat %a)
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
index fdc9704a3784e..a01c2fa152ab3 100644
--- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -337,8 +337,7 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX9-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 32, vcc
 ; GFX9-NEXT:    v_ldexp_f32 v3, |v0|, v3
 ; GFX9-NEXT:    v_log_f32_e32 v3, v3
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v1
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 935ae48654b64..993f162921663 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -1278,84 +1278,44 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
 }
 
 define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-SDAG-LABEL: fmul_select_f64_test12:
-; GFX7-SDAG:       ; %bb.0:
-; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
-; GFX7-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 31, v2
-; GFX7-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; GFX7-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-GISEL-LABEL: fmul_select_f64_test12:
-; GFX7-GISEL:       ; %bb.0:
-; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-GISEL-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX7-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX7-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX7-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SDAG-LABEL: fmul_select_f64_test12:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, v2, v3
-; GFX9-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 31, v2
-; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: fmul_select_f64_test12:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_bfrev_b32_e32 v5, 1
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
-; GFX9-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: fmul_select_f64_test12:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
-; GFX10-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX10-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 31, v3
-; GFX10-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: fmul_select_f64_test12:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfrev_b32_e32 v5, 1
+; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-NEXT:    v_mov_b32_e32 v4, 0
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-GISEL-LABEL: fmul_select_f64_test12:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX10-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
-; GFX10-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-LABEL: fmul_select_f64_test12:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_bfrev_b32_e32 v5, 1
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: fmul_select_f64_test12:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v3
-; GFX11-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3
-; GFX11-SDAG-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test12:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
+; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: fmul_select_f64_test12:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v4, 0
-; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: fmul_select_f64_test12:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
   %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00
   %ldexp = fmul double %x, %y
@@ -3137,11 +3097,11 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX7-LABEL: fmul_select_bf16_test8:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
 ; GFX7-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-NEXT:    v_bfrev_b32_e32 v3, 1
+; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
 ; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -3149,10 +3109,10 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX9-LABEL: fmul_select_bf16_test8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff8000
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v2, 15
-; GFX9-NEXT:    v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_bfe_u32 v1, v0, 16, 1
@@ -3169,8 +3129,7 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX10-NEXT:    v_lshlrev_b16 v1, 15, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 0xffff8000, vcc_lo
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_bfe_u32 v1, v0, 16, 1
@@ -3186,19 +3145,17 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v1, v2
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0xffff8000, vcc_lo
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b16 v1, 15, v1
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_bfe_u32 v1, v0, 16, 1
 ; GFX11-NEXT:    v_or_b32_e32 v2, 0x400000, v0
 ; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %bool = icmp eq i32 %bool.arg1, %bool.arg2
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index 67f2487aed73a..22bf6cd0c8ff2 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -29,20 +29,18 @@ entry:
 define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) {
 ; GCN-LABEL: int4_extelt:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_cmp_eq_u32 s6, 1
-; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 2
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    s_cmp_lg_u32 s6, 3
-; GCN-NEXT:    v_cndmask_b32_e32 v0, 2, v0, vcc
-; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v2, 4, v0, vcc
+; GCN-NEXT:    s_cmp_eq_u32 s2, 1
+; GCN-NEXT:    s_cselect_b32 s3, 1, 0
+; GCN-NEXT:    s_cmp_lg_u32 s2, 2
+; GCN-NEXT:    s_cselect_b32 s3, s3, 2
+; GCN-NEXT:    s_cmp_lg_u32 s2, 3
+; GCN-NEXT:    s_cselect_b32 s2, s3, 4
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    v_mov_b32_e32 v2, s2
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
 ; GCN-NEXT:    s_endpgm
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index a457338873157..fec04a27cda91 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -1529,42 +1529,35 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
 ; SI-NEXT:    v_med3_i32 v1, s6, 0, 13
 ; SI-NEXT:    s_or_b32 s4, s2, 0x1000
 ; SI-NEXT:    v_readfirstlane_b32 s6, v1
-; SI-NEXT:    s_lshr_b32 s6, s4, s6
-; SI-NEXT:    v_lshl_b32_e32 v1, s6, v1
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v1
-; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-NEXT:    s_add_i32 s8, s5, 0xfffffc10
-; SI-NEXT:    v_readfirstlane_b32 s4, v1
-; SI-NEXT:    s_lshl_b32 s5, s8, 12
-; SI-NEXT:    s_or_b32 s4, s6, s4
-; SI-NEXT:    s_or_b32 s5, s2, s5
-; SI-NEXT:    s_cmp_lt_i32 s8, 1
-; SI-NEXT:    s_cselect_b32 s9, s4, s5
-; SI-NEXT:    s_and_b32 s6, s9, 7
+; SI-NEXT:    s_lshr_b32 s7, s4, s6
+; SI-NEXT:    s_lshl_b32 s6, s7, s6
+; SI-NEXT:    s_cmp_lg_u32 s6, s4
+; SI-NEXT:    s_cselect_b32 s4, 1, 0
+; SI-NEXT:    s_addk_i32 s5, 0xfc10
+; SI-NEXT:    s_lshl_b32 s6, s5, 12
+; SI-NEXT:    s_or_b32 s4, s7, s4
+; SI-NEXT:    s_or_b32 s6, s2, s6
+; SI-NEXT:    s_cmp_lt_i32 s5, 1
+; SI-NEXT:    s_cselect_b32 s4, s4, s6
+; SI-NEXT:    s_and_b32 s6, s4, 7
 ; SI-NEXT:    s_cmp_gt_i32 s6, 5
-; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT:    s_cselect_b32 s7, 1, 0
 ; SI-NEXT:    s_cmp_eq_u32 s6, 3
-; SI-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; SI-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
-; SI-NEXT:    s_lshr_b32 s6, s9, 2
-; SI-NEXT:    s_or_b32 s4, s4, s5
-; SI-NEXT:    s_cmp_lg_u32 s4, 0
-; SI-NEXT:    s_addc_u32 s4, s6, 0
-; SI-NEXT:    s_cmp_lt_i32 s8, 31
-; SI-NEXT:    s_cselect_b32 s6, s4, 0x7c00
+; SI-NEXT:    s_cselect_b32 s6, 1, 0
+; SI-NEXT:    s_or_b32 s6, s6, s7
+; SI-NEXT:    s_lshr_b32 s4, s4, 2
+; SI-NEXT:    s_add_i32 s4, s4, s6
+; SI-NEXT:    s_cmp_lt_i32 s5, 31
+; SI-NEXT:    s_cselect_b32 s4, s4, 0x7c00
 ; SI-NEXT:    s_cmp_lg_u32 s2, 0
-; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 9, v1
-; SI-NEXT:    s_cmpk_eq_i32 s8, 0x40f
-; SI-NEXT:    v_or_b32_e32 v1, 0x7c00, v1
-; SI-NEXT:    v_mov_b32_e32 v2, s6
-; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_lshr_b32 s2, s3, 16
-; SI-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; SI-NEXT:    s_and_b32 s2, s2, 0x8000
-; SI-NEXT:    v_or_b32_e32 v1, s2, v1
-; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    s_movk_i32 s2, 0x7e00
+; SI-NEXT:    s_cselect_b32 s2, s2, 0x7c00
+; SI-NEXT:    s_cmpk_eq_i32 s5, 0x40f
+; SI-NEXT:    s_cselect_b32 s2, s2, s4
+; SI-NEXT:    s_lshr_b32 s3, s3, 16
+; SI-NEXT:    s_and_b32 s3, s3, 0x8000
+; SI-NEXT:    s_or_b32 s2, s3, s2
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, s2
 ; SI-NEXT:    s_brev_b32 s2, -2
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    v_bfi_b32 v0, s2, v1, v0
@@ -1587,47 +1580,42 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
 ; VI-NEXT:    s_cmp_lg_u32 s0, 0
 ; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-NEXT:    s_bfe_u32 s1, s3, 0xb0014
-; VI-NEXT:    v_readfirstlane_b32 s0, v2
-; VI-NEXT:    s_sub_i32 s2, 0x3f1, s1
-; VI-NEXT:    s_or_b32 s5, s5, s0
-; VI-NEXT:    v_med3_i32 v2, s2, 0, 13
-; VI-NEXT:    s_or_b32 s0, s5, 0x1000
-; VI-NEXT:    v_readfirstlane_b32 s2, v2
-; VI-NEXT:    s_lshr_b32 s2, s0, s2
-; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s2
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, s0, v2
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-NEXT:    s_add_i32 s6, s1, 0xfffffc10
+; VI-NEXT:    s_bfe_u32 s2, s3, 0xb0014
 ; VI-NEXT:    v_readfirstlane_b32 s0, v2
-; VI-NEXT:    s_lshl_b32 s1, s6, 12
-; VI-NEXT:    s_or_b32 s0, s2, s0
+; VI-NEXT:    s_sub_i32 s3, 0x3f1, s2
+; VI-NEXT:    s_or_b32 s0, s5, s0
+; VI-NEXT:    v_med3_i32 v2, s3, 0, 13
+; VI-NEXT:    s_or_b32 s1, s0, 0x1000
+; VI-NEXT:    v_readfirstlane_b32 s3, v2
+; VI-NEXT:    s_lshr_b32 s5, s1, s3
+; VI-NEXT:    s_lshl_b32 s3, s5, s3
+; VI-NEXT:    s_cmp_lg_u32 s3, s1
+; VI-NEXT:    s_cselect_b32 s1, 1, 0
+; VI-NEXT:    s_addk_i32 s2, 0xfc10
+; VI-NEXT:    s_lshl_b32 s3, s2, 12
 ; VI-NEXT:    s_or_b32 s1, s5, s1
-; VI-NEXT:    s_cmp_lt_i32 s6, 1
-; VI-NEXT:    s_cselect_b32 s7, s0, s1
-; VI-NEXT:    s_and_b32 s2, s7, 7
-; VI-NEXT:    s_cmp_gt_i32 s2, 5
-; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    s_cmp_eq_u32 s2, 3
-; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; VI-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
-; VI-NEXT:    s_lshr_b32 s2, s7, 2
-; VI-NEXT:    s_cmp_lg_u64 s[0:1], 0
-; VI-NEXT:    s_addc_u32 s0, s2, 0
-; VI-NEXT:    s_cmp_lt_i32 s6, 31
-; VI-NEXT:    s_cselect_b32 s2, s0, 0x7c00
-; VI-NEXT:    s_cmp_lg_u32 s5, 0
-; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 9, v2
-; VI-NEXT:    s_cmpk_eq_i32 s6, 0x40f
-; VI-NEXT:    v_or_b32_e32 v2, 0x7c00, v2
-; VI-NEXT:    v_mov_b32_e32 v3, s2
-; VI-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
-; VI-NEXT:    s_movk_i32 s0, 0x7fff
+; VI-NEXT:    s_or_b32 s3, s0, s3
+; VI-NEXT:    s_cmp_lt_i32 s2, 1
+; VI-NEXT:    s_cselect_b32 s1, s1, s3
+; VI-NEXT:    s_and_b32 s3, s1, 7
+; VI-NEXT:    s_cmp_gt_i32 s3, 5
+; VI-NEXT:    s_cselect_b32 s5, 1, 0
+; VI-NEXT:    s_cmp_eq_u32 s3, 3
+; VI-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-NEXT:    s_or_b32 s3, s3, s5
+; VI-NEXT:    s_lshr_b32 s1, s1, 2
+; VI-NEXT:    s_add_i32 s1, s1, s3
+; VI-NEXT:    s_cmp_lt_i32 s2, 31
+; VI-NEXT:    s_cselect_b32 s1, s1, 0x7c00
+; VI-NEXT:    s_cmp_lg_u32 s0, 0
+; VI-NEXT:    s_movk_i32 s0, 0x7e00
+; VI-NEXT:    s_cselect_b32 s0, s0, 0x7c00
+; VI-NEXT:    s_cmpk_eq_i32 s2, 0x40f
+; VI-NEXT:    s_cselect_b32 s0, s0, s1
+; VI-NEXT:    s_movk_i32 s1, 0x7fff
+; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    v_mov_b32_e32 v3, s4
-; VI-NEXT:    v_bfi_b32 v2, s0, v2, v3
+; VI-NEXT:    v_bfi_b32 v2, s1, v2, v3
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
@@ -1646,45 +1634,40 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GFX9-NEXT:    s_bfe_u32 s3, s3, 0xb0014
 ; GFX9-NEXT:    v_readfirstlane_b32 s2, v1
-; GFX9-NEXT:    s_sub_i32 s4, 0x3f1, s3
-; GFX9-NEXT:    s_or_b32 s7, s7, s2
-; GFX9-NEXT:    v_med3_i32 v1, s4, 0, 13
-; GFX9-NEXT:    s_or_b32 s2, s7, 0x1000
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX9-NEXT:    s_lshr_b32 s4, s2, s4
-; GFX9-NEXT:    v_lshlrev_b32_e64 v1, v1, s4
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, s2, v1
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT:    s_add_i32 s8, s3, 0xfffffc10
-; GFX9-NEXT:    v_readfirstlane_b32 s2, v1
-; GFX9-NEXT:    s_lshl_b32 s3, s8, 12
-; GFX9-NEXT:    s_or_b32 s2, s4, s2
-; GFX9-NEXT:    s_or_b32 s3, s7, s3
-; GFX9-NEXT:    s_cmp_lt_i32 s8, 1
-; GFX9-NEXT:    s_cselect_b32 s9, s2, s3
-; GFX9-NEXT:    s_and_b32 s4, s9, 7
-; GFX9-NEXT:    s_cmp_gt_i32 s4, 5
-; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT:    s_cmp_eq_u32 s4, 3
-; GFX9-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GFX9-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
-; GFX9-NEXT:    s_lshr_b32 s4, s9, 2
-; GFX9-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9-NEXT:    s_addc_u32 s2, s4, 0
-; GFX9-NEXT:    s_cmp_lt_i32 s8, 31
-; GFX9-NEXT:    s_cselect_b32 s4, s2, 0x7c00
-; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX9-NEXT:    s_cselect_b64 s[2:3], -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3]
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 9, v1
-; GFX9-NEXT:    s_cmpk_eq_i32 s8, 0x40f
-; GFX9-NEXT:    v_or_b32_e32 v1, 0x7c00, v1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s4
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
-; GFX9-NEXT:    s_movk_i32 s2, 0x7fff
+; GFX9-NEXT:    s_sub_i32 s5, 0x3f1, s3
+; GFX9-NEXT:    s_or_b32 s2, s7, s2
+; GFX9-NEXT:    v_med3_i32 v1, s5, 0, 13
+; GFX9-NEXT:    s_or_b32 s4, s2, 0x1000
+; GFX9-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX9-NEXT:    s_lshr_b32 s7, s4, s5
+; GFX9-NEXT:    s_lshl_b32 s5, s7, s5
+; GFX9-NEXT:    s_cmp_lg_u32 s5, s4
+; GFX9-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX9-NEXT:    s_addk_i32 s3, 0xfc10
+; GFX9-NEXT:    s_lshl_b32 s5, s3, 12
+; GFX9-NEXT:    s_or_b32 s4, s7, s4
+; GFX9-NEXT:    s_or_b32 s5, s2, s5
+; GFX9-NEXT:    s_cmp_lt_i32 s3, 1
+; GFX9-NEXT:    s_cselect_b32 s4, s4, s5
+; GFX9-NEXT:    s_and_b32 s5, s4, 7
+; GFX9-NEXT:    s_cmp_gt_i32 s5, 5
+; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX9-NEXT:    s_or_b32 s5, s5, s7
+; GFX9-NEXT:    s_lshr_b32 s4, s4, 2
+; GFX9-NEXT:    s_add_i32 s4, s4, s5
+; GFX9-NEXT:    s_cmp_lt_i32 s3, 31
+; GFX9-NEXT:    s_cselect_b32 s4, s4, 0x7c00
+; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9-NEXT:    s_movk_i32 s2, 0x7e00
+; GFX9-NEXT:    s_cselect_b32 s2, s2, 0x7c00
+; GFX9-NEXT:    s_cmpk_eq_i32 s3, 0x40f
+; GFX9-NEXT:    s_cselect_b32 s2, s2, s4
+; GFX9-NEXT:    s_movk_i32 s3, 0x7fff
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s6
-; GFX9-NEXT:    v_bfi_b32 v1, s2, v1, v2
+; GFX9-NEXT:    v_bfi_b32 v1, s3, v1, v2
 ; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -1704,51 +1687,48 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
 ; GFX11-NEXT:    s_bfe_u32 s2, s3, 0xb0014
 ; GFX11-NEXT:    s_sub_i32 s3, 0x3f1, s2
-; GFX11-NEXT:    s_addk_i32 s2, 0xfc10
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_med3_i32 v1, s3, 0, 13
 ; GFX11-NEXT:    v_readfirstlane_b32 s3, v0
-; GFX11-NEXT:    s_lshl_b32 s7, s2, 12
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
 ; GFX11-NEXT:    v_readfirstlane_b32 s6, v1
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_or_b32 s3, s5, s3
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_or_b32 s5, s3, 0x1000
-; GFX11-NEXT:    s_or_b32 s7, s3, s7
-; GFX11-NEXT:    s_lshr_b32 s6, s5, s6
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
-; GFX11-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX11-NEXT:    s_or_b32 s5, s6, s5
-; GFX11-NEXT:    s_cmp_lt_i32 s2, 1
-; GFX11-NEXT:    s_cselect_b32 s5, s5, s7
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_lshr_b32 s7, s5, s6
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_lshl_b32 s6, s7, s6
+; GFX11-NEXT:    s_cmp_lg_u32 s6, s5
+; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX11-NEXT:    s_addk_i32 s2, 0xfc10
+; GFX11-NEXT:    s_or_b32 s5, s7, s5
+; GFX11-NEXT:    s_lshl_b32 s6, s2, 12
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_or_b32 s6, s3, s6
+; GFX11-NEXT:    s_cmp_lt_i32 s2, 1
+; GFX11-NEXT:    s_cselect_b32 s5, s5, s6
 ; GFX11-NEXT:    s_and_b32 s6, s5, 7
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_cmp_gt_i32 s6, 5
-; GFX11-NEXT:    s_cselect_b32 s7, -1, 0
+; GFX11-NEXT:    s_cselect_b32 s7, 1, 0
 ; GFX11-NEXT:    s_cmp_eq_u32 s6, 3
-; GFX11-NEXT:    s_cselect_b32 s6, -1, 0
+; GFX11-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX11-NEXT:    s_lshr_b32 s5, s5, 2
 ; GFX11-NEXT:    s_or_b32 s6, s6, s7
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX11-NEXT:    s_addc_u32 s5, s5, 0
+; GFX11-NEXT:    s_add_i32 s5, s5, s6
 ; GFX11-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX11-NEXT:    s_movk_i32 s6, 0x7e00
 ; GFX11-NEXT:    s_cselect_b32 s5, s5, 0x7c00
 ; GFX11-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX11-NEXT:    s_cselect_b32 s3, -1, 0
+; GFX11-NEXT:    s_cselect_b32 s3, s6, 0x7c00
 ; GFX11-NEXT:    s_cmpk_eq_i32 s2, 0x40f
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s3
-; GFX11-NEXT:    s_cselect_b32 vcc_lo, -1, 0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
-; GFX11-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
-; GFX11-NEXT:    v_bfi_b32 v0, 0x7fff, v0, s4
+; GFX11-NEXT:    s_cselect_b32 s2, s3, s5
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_bfi_b32 v0, 0x7fff, s2, v0
 ; GFX11-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_endpgm
   %mag.trunc = fptrunc double %mag to half
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index a324ba35b155f..3983655285e57 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -1499,8 +1499,7 @@ define float @v_recip_sqrt_f32_ulp25(float %x) {
 ; CODEGEN-IEEE-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; CODEGEN-IEEE-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; CODEGEN-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CODEGEN-IEEE-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; CODEGEN-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; CODEGEN-IEEE-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; CODEGEN-IEEE-SDAG-NEXT:    v_sqrt_f32_e32 v0, v0
 ; CODEGEN-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1535,8 +1534,7 @@ define float @v_recip_sqrt_f32_ulp25(float %x) {
 ; IR-IEEE-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; IR-IEEE-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; IR-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; IR-IEEE-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; IR-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; IR-IEEE-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; IR-IEEE-SDAG-NEXT:    v_sqrt_f32_e32 v0, v0
 ; IR-IEEE-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index b32630a97b3ad..f4c5ebd8b3cf5 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -3000,18 +3000,19 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; SI-NEXT:    s_movk_i32 s4, 0x40f
-; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -3049,18 +3050,19 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; VI-NEXT:    s_movk_i32 s4, 0x40f
-; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
@@ -3085,8 +3087,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
 ; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
@@ -3094,21 +3095,23 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
-; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
@@ -3149,18 +3152,19 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; SI-NEXT:    s_movk_i32 s4, 0x40f
-; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -3196,18 +3200,19 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; VI-NEXT:    s_movk_i32 s4, 0x40f
-; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
@@ -3229,9 +3234,8 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_or_b32 v0, 0xffe, v2, v0
 ; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
@@ -3242,20 +3246,22 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
-; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
@@ -3298,18 +3304,19 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v5, 7, v2
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
-; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v5
+; SI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v5
+; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-NEXT:    v_or_b32_e32 v5, v5, v6
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; SI-NEXT:    v_mov_b32_e32 v5, 0x7c00
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v6, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; SI-NEXT:    s_movk_i32 s4, 0x40f
-; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v4
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
@@ -3349,18 +3356,19 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; VI-NEXT:    v_and_b32_e32 v6, 7, v4
 ; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
-; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v6
+; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
+; VI-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-NEXT:    v_or_b32_e32 v6, v6, v7
 ; VI-NEXT:    v_lshrrev_b32_e32 v4, 2, v4
-; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; VI-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; VI-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
 ; VI-NEXT:    v_mov_b32_e32 v6, 0x7c00
 ; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; VI-NEXT:    v_mov_b32_e32 v7, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; VI-NEXT:    s_movk_i32 s4, 0x40f
-; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v5
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x8000
@@ -3383,9 +3391,8 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_or_b32 v2, 0xffe, v3, v2
 ; GFX11-NEXT:    v_med3_i32 v3, v5, 0, 13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v5, 0x1000, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v6, v3, v5
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v3, v3, v6
@@ -3396,27 +3403,29 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
 ; GFX11-NEXT:    v_add_nc_u32_e32 v4, 0xfffffc10, v4
 ; GFX11-NEXT:    v_lshl_or_b32 v5, v4, 12, v2
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v4
-; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc_lo
-; GFX11-NEXT:    v_lshl_or_b32 v2, v2, 9, 0x7c00
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v5, 7, v3
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v5
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v5
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-NEXT:    v_or_b32_e32 v5, v5, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_mov_b32 v6, 0x7e00 :: v_dual_add_nc_u32 v3, v3, v5
+; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v6, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v4
+; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_or_b32 v3, 0x8000, v5, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fneg.a = fneg double %a
@@ -3456,18 +3465,19 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; SI-NEXT:    v_and_b32_e32 v7, 7, v5
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
-; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v7
+; SI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
+; SI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; SI-NEXT:    v_or_b32_e32 v7, v7, v8
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
-; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; SI-NEXT:    v_mov_b32_e32 v7, 0x7c00
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; SI-NEXT:    v_mov_b32_e32 v8, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v4, 9, v4
 ; SI-NEXT:    s_movk_i32 s4, 0x40f
-; SI-NEXT:    v_or_b32_e32 v4, 0x7c00, v4
+; SI-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v6
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
@@ -3506,19 +3516,20 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
 ; VI-NEXT:    v_and_b32_e32 v7, 7, v5
 ; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
-; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v7
-; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
-; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
+; VI-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; VI-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
+; VI-NEXT:    v_or_b32_e32 v7, v7, v8
+; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
+; VI-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
 ; VI-NEXT:    v_mov_b32_e32 v7, 0x7c00
 ; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
 ; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; VI-NEXT:    v_mov_b32_e32 v8, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v4, 9, v4
 ; VI-NEXT:    s_movk_i32 s4, 0x40f
-; VI-NEXT:    v_or_b32_e32 v4, 0x7c00, v4
+; VI-NEXT:    v_cndmask_b32_e32 v4, v7, v8, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v6
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x8000
@@ -3537,42 +3548,43 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
 ; GFX11-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 0x3f1, v6
-; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xfffffc10, v6
 ; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
 ; GFX11-NEXT:    v_and_or_b32 v4, 0xffe, v5, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_med3_i32 v5, v7, 0, 13
 ; GFX11-NEXT:    v_or_b32_e32 v7, 0x1000, v4
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v8, v5, v7
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v5, v5, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v5, v7
-; GFX11-NEXT:    v_lshl_or_b32 v7, v6, 12, v4
-; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
-; GFX11-NEXT:    v_lshl_or_b32 v4, v4, 9, 0x7c00
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v5, v8, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc_lo
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xfffffc10, v6
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_and_b32_e32 v5, 7, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v5
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v0, vcc_lo, 0, v0, vcc_lo
+; GFX11-NEXT:    v_lshl_or_b32 v7, v6, 12, v4
+; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
+; GFX11-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_and_b32_e32 v0, 7, v5
+; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
+; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_or_b32_e32 v0, v0, v7
+; GFX11-NEXT:    v_dual_mov_b32 v7, 0x7e00 :: v_dual_add_nc_u32 v0, v5, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7c00, v7, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v0, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_and_or_b32 v0, 0x8000, v1, v0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -3613,18 +3625,19 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; SI-NEXT:    s_movk_i32 s4, 0x40f
-; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
@@ -3662,18 +3675,19 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; VI-NEXT:    v_and_b32_e32 v4, 7, v2
 ; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
-; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v4
+; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-NEXT:    v_or_b32_e32 v4, v4, v5
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
-; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
 ; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, 0x7e00
 ; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
 ; VI-NEXT:    s_movk_i32 s4, 0x40f
-; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
 ; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
@@ -3696,9 +3710,8 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-NEXT:    v_and_or_b32 v0, 0xffe, v2, v0
 ; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
-; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
@@ -3709,20 +3722,22 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
-; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
-; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v4
+; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7e00 :: v_dual_add_nc_u32 v2, v2, v4
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
 ; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
 ; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index b2d30b751ae2c..e1791daa3aa0c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -828,9 +828,9 @@ define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc
+; GCN-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GCN-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, v3
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -839,14 +839,14 @@ define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_and_b32_e32 v5, 1, v0
-; GFX11-NEXT:    v_cmp_lt_i32_e64 s0, 1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
-; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v1, v3 :: v_dual_cndmask_b32 v1, v2, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 31, v5
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX11-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 1, v0
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 0x80000000, vcc_lo
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v0
+; GFX11-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %i = and i32 %arg, 1
   %i3 = icmp eq i32 %i, 0
@@ -907,12 +907,13 @@ define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
 ; GFX7-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX7-NEXT:    v_mov_b32_e32 v2, 0xffff8000
 ; GFX7-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v3
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 31, v2
-; GFX7-NEXT:    v_xor_b32_e32 v0, v2, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -922,9 +923,9 @@ define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
 ; GFX9-NEXT:    v_and_b32_e32 v3, 1, v0
 ; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, 0, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff8000
 ; GFX9-NEXT:    v_cmp_lt_i16_e32 vcc, 1, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
 ; GFX9-NEXT:    v_xor_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
 ; GFX9-NEXT:    v_perm_b32 v0, v0, v1, s4
@@ -934,16 +935,14 @@ define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_and_b32_e32 v3, 1, v0
-; GFX11-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v3
-; GFX11-NEXT:    v_lshlrev_b16 v0, 15, v0
 ; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 1, v0
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 0xffff8000, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %i = and i16 %arg, 1
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 051a0c51b0867..5e15846fb76fc 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -95,121 +95,110 @@ define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in)
 define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) {
 ; SI-LABEL: fptrunc_f64_to_f16:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_movk_i32 s6, 0x7e00
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_lshr_b32 s4, s7, 8
-; SI-NEXT:    s_and_b32 s5, s7, 0x1ff
+; SI-NEXT:    s_lshr_b32 s4, s3, 8
+; SI-NEXT:    s_and_b32 s5, s3, 0x1ff
 ; SI-NEXT:    s_and_b32 s8, s4, 0xffe
-; SI-NEXT:    s_or_b32 s4, s5, s6
-; SI-NEXT:    s_cmp_lg_u32 s4, 0
+; SI-NEXT:    s_or_b32 s2, s5, s2
+; SI-NEXT:    s_cmp_lg_u32 s2, 0
 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; SI-NEXT:    s_bfe_u32 s4, s7, 0xb0014
-; SI-NEXT:    v_readfirstlane_b32 s5, v0
-; SI-NEXT:    s_sub_i32 s6, 0x3f1, s4
-; SI-NEXT:    s_add_i32 s10, s4, 0xfffffc10
-; SI-NEXT:    s_or_b32 s11, s8, s5
-; SI-NEXT:    v_med3_i32 v0, s6, 0, 13
-; SI-NEXT:    s_lshl_b32 s4, s10, 12
-; SI-NEXT:    s_or_b32 s5, s11, 0x1000
-; SI-NEXT:    v_readfirstlane_b32 s6, v0
-; SI-NEXT:    s_or_b32 s4, s11, s4
-; SI-NEXT:    s_lshr_b32 s6, s5, s6
-; SI-NEXT:    v_lshl_b32_e32 v0, s6, v0
-; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s5, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    v_readfirstlane_b32 s5, v0
-; SI-NEXT:    s_or_b32 s5, s6, s5
-; SI-NEXT:    s_cmp_lt_i32 s10, 1
-; SI-NEXT:    s_cselect_b32 s6, s5, s4
-; SI-NEXT:    s_and_b32 s8, s6, 7
+; SI-NEXT:    s_bfe_u32 s2, s3, 0xb0014
+; SI-NEXT:    v_readfirstlane_b32 s4, v0
+; SI-NEXT:    s_sub_i32 s5, 0x3f1, s2
+; SI-NEXT:    s_or_b32 s4, s8, s4
+; SI-NEXT:    v_med3_i32 v0, s5, 0, 13
+; SI-NEXT:    s_or_b32 s5, s4, 0x1000
+; SI-NEXT:    v_readfirstlane_b32 s8, v0
+; SI-NEXT:    s_lshr_b32 s9, s5, s8
+; SI-NEXT:    s_lshl_b32 s8, s9, s8
+; SI-NEXT:    s_cmp_lg_u32 s8, s5
+; SI-NEXT:    s_cselect_b32 s5, 1, 0
+; SI-NEXT:    s_addk_i32 s2, 0xfc10
+; SI-NEXT:    s_or_b32 s5, s9, s5
+; SI-NEXT:    s_lshl_b32 s8, s2, 12
+; SI-NEXT:    s_or_b32 s8, s4, s8
+; SI-NEXT:    s_cmp_lt_i32 s2, 1
+; SI-NEXT:    s_cselect_b32 s5, s5, s8
+; SI-NEXT:    s_and_b32 s8, s5, 7
 ; SI-NEXT:    s_cmp_gt_i32 s8, 5
-; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT:    s_cselect_b32 s9, 1, 0
 ; SI-NEXT:    s_cmp_eq_u32 s8, 3
-; SI-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; SI-NEXT:    s_lshr_b32 s6, s6, 2
-; SI-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
-; SI-NEXT:    s_or_b32 s4, s4, s5
+; SI-NEXT:    s_cselect_b32 s8, 1, 0
+; SI-NEXT:    s_lshr_b32 s5, s5, 2
+; SI-NEXT:    s_or_b32 s8, s8, s9
+; SI-NEXT:    s_add_i32 s5, s5, s8
+; SI-NEXT:    s_cmp_lt_i32 s2, 31
+; SI-NEXT:    s_cselect_b32 s5, s5, 0x7c00
 ; SI-NEXT:    s_cmp_lg_u32 s4, 0
-; SI-NEXT:    s_addc_u32 s4, s6, 0
-; SI-NEXT:    s_cmp_lt_i32 s10, 31
-; SI-NEXT:    s_cselect_b32 s6, s4, 0x7c00
-; SI-NEXT:    s_cmp_lg_u32 s11, 0
-; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; SI-NEXT:    s_cmpk_eq_i32 s10, 0x40f
-; SI-NEXT:    v_mov_b32_e32 v1, s6
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
-; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
-; SI-NEXT:    s_cselect_b64 vcc, -1, 0
-; SI-NEXT:    s_lshr_b32 s4, s7, 16
-; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; SI-NEXT:    s_and_b32 s4, s4, 0x8000
-; SI-NEXT:    v_or_b32_e32 v0, s4, v0
-; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT:    s_cselect_b32 s4, s6, 0x7c00
+; SI-NEXT:    s_cmpk_eq_i32 s2, 0x40f
+; SI-NEXT:    s_cselect_b32 s2, s4, s5
+; SI-NEXT:    s_lshr_b32 s3, s3, 16
+; SI-NEXT:    s_and_b32 s3, s3, 0x8000
+; SI-NEXT:    s_or_b32 s2, s3, s2
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
 ; VI-SAFE-SDAG:       ; %bb.0:
-; VI-SAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-SAFE-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; VI-SAFE-SDAG-NEXT:    s_mov_b32 s6, -1
+; VI-SAFE-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; VI-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
 ; VI-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SAFE-SDAG-NEXT:    s_mov_b32 s4, s0
-; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s0, s3, 8
-; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s0, 0xffe
-; VI-SAFE-SDAG-NEXT:    s_and_b32 s0, s3, 0x1ff
-; VI-SAFE-SDAG-NEXT:    s_or_b32 s0, s0, s2
-; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s0, 0
-; VI-SAFE-SDAG-NEXT:    s_mov_b32 s5, s1
-; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SAFE-SDAG-NEXT:    s_bfe_u32 s1, s3, 0xb0014
-; VI-SAFE-SDAG-NEXT:    s_or_b32 s2, s8, s0
-; VI-SAFE-SDAG-NEXT:    s_sub_i32 s8, 0x3f1, s1
+; VI-SAFE-SDAG-NEXT:    s_mov_b32 s0, s4
+; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 8
+; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s4, 0xffe
+; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s7, 0x1ff
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s6
+; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
+; VI-SAFE-SDAG-NEXT:    s_mov_b32 s1, s5
+; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
+; VI-SAFE-SDAG-NEXT:    s_bfe_u32 s6, s7, 0xb0014
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s8, s4
+; VI-SAFE-SDAG-NEXT:    s_sub_i32 s8, 0x3f1, s6
 ; VI-SAFE-SDAG-NEXT:    v_med3_i32 v0, s8, 0, 13
-; VI-SAFE-SDAG-NEXT:    s_or_b32 s0, s2, 0x1000
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s8, v0
-; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s0, s8
-; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v0, s8
-; VI-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s0, v0
-; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SAFE-SDAG-NEXT:    s_add_i32 s10, s1, 0xfffffc10
-; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
-; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s1, s10, 12
-; VI-SAFE-SDAG-NEXT:    s_or_b32 s0, s8, s0
-; VI-SAFE-SDAG-NEXT:    s_or_b32 s1, s2, s1
-; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 1
-; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s11, s0, s1
-; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s11, 7
+; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s9, s5, s8
+; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s8, s9, s8
+; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s8, s5
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 1, 0
+; VI-SAFE-SDAG-NEXT:    s_addk_i32 s6, 0xfc10
+; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s8, s6, 12
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s5, s9, s5
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s8, s4, s8
+; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s6, 1
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s8
+; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s5, 7
 ; VI-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s8, 5
-; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s9, 1, 0
 ; VI-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s8, 3
-; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; VI-SAFE-SDAG-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
-; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s11, 2
-; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u64 s[0:1], 0
-; VI-SAFE-SDAG-NEXT:    s_addc_u32 s0, s8, 0
-; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 31
-; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s8, s0, 0x7c00
-; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
-; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
-; VI-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s10, 0x40f
-; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
-; VI-SAFE-SDAG-NEXT:    v_mov_b32_e32 v1, s8
-; VI-SAFE-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
-; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
-; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; VI-SAFE-SDAG-NEXT:    s_and_b32 s0, s0, 0x8000
-; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s0, v0
-; VI-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s8, 1, 0
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s8, s8, s9
+; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
+; VI-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s8
+; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s6, 31
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
+; VI-SAFE-SDAG-NEXT:    s_movk_i32 s4, 0x7e00
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s4, 0x7c00
+; VI-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s6, 0x40f
+; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s4, s5
+; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s7, 16
+; VI-SAFE-SDAG-NEXT:    s_and_b32 s5, s5, 0x8000
+; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s5, s4
+; VI-SAFE-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; VI-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-SAFE-SDAG-NEXT:    s_endpgm
 ;
 ; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
@@ -299,45 +288,41 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
 ; GFX10-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
 ; GFX10-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
-; GFX10-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
 ; GFX10-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
-; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
-; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
-; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
-; GFX10-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
-; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
+; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s7, s5, s6
+; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s6, s7, s6
+; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, s5
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX10-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
+; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s7, s5
+; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s6, s2, 12
+; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s4, s6
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s6
 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s7, 1, 0
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
-; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX10-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
+; GFX10-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s6
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX10-SAFE-SDAG-NEXT:    s_movk_i32 s6, 0x7e00
 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s6, 0x7c00
 ; GFX10-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
-; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
-; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
-; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s2, s4, s5
+; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s3, s3, 16
+; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s3, s3, 0x8000
+; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s2, s3, s2
 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
-; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
-; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
-; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
-; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
+; GFX10-SAFE-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
 ; GFX10-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX10-SAFE-SDAG-NEXT:    s_endpgm
@@ -430,53 +415,50 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
 ; GFX11-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
 ; GFX11-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
-; GFX11-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
-; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
-; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
-; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
-; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
-; GFX11-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
-; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
-; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s7, s5, s6
 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s6, s7, s6
+; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, s5
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX11-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
+; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s7, s5
+; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s6, s2, 12
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s4, s6
+; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s6
 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s7, 1, 0
 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX11-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
+; GFX11-SAFE-SDAG-NEXT:    s_add_i32 s5, s5, s6
 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
+; GFX11-SAFE-SDAG-NEXT:    s_movk_i32 s6, 0x7e00
 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, s6, 0x7c00
 ; GFX11-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
-; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
-; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
-; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
+; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s2, s4, s5
+; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s3, s3, 16
+; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s3, s3, 0x8000
+; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s2, s3, s2
 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
-; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
-; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
-; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
-; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
+; GFX11-SAFE-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
 ; GFX11-SAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-SAFE-SDAG-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index c1d5b5857b6b5..87c7cce854b11 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -1837,8 +1837,7 @@ define float @v_sqrt_f32_ulp2(float %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1874,8 +1873,7 @@ define float @v_sqrt_f32_ulp25(float %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1911,8 +1909,7 @@ define float @v_sqrt_f32_ulp3(float %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -1947,8 +1944,7 @@ define float @v_sqrt_f32_ulp2_fabs(float %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, s[4:5]
@@ -2074,12 +2070,10 @@ define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
@@ -2218,12 +2212,10 @@ define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s6, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_cmp_lt_f32_e64 s[6:7], |v1|, s6
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[6:7]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s[6:7]
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e64 v1, |v1|, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
@@ -2315,8 +2307,7 @@ define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -2404,8 +2395,7 @@ define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2489,8 +2479,7 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2574,8 +2563,7 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
@@ -2631,12 +2619,10 @@ define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
@@ -2763,12 +2749,10 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x flo
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
@@ -2900,12 +2884,10 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e64 s[4:5], s4, v1
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v4
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v4, 0, 32, s[4:5]
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v1, v4
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
@@ -3026,8 +3008,7 @@ define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x)
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3062,8 +3043,7 @@ define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub)
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3098,8 +3078,7 @@ define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3698,8 +3677,7 @@ define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero n
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3734,8 +3712,7 @@ define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3770,8 +3747,7 @@ define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v0, v0
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, -16, vcc
@@ -3910,8 +3886,7 @@ define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) {
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-IEEE-NEXT:    s_mov_b32 s4, 0x800000
 ; SDAG-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SDAG-IEEE-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SDAG-IEEE-NEXT:    v_ldexp_f32_e32 v1, v0, v1
 ; SDAG-IEEE-NEXT:    v_sqrt_f32_e32 v1, v1
 ; SDAG-IEEE-NEXT:    v_cndmask_b32_e64 v2, 0, -16, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 34ee90c68569f..42f098522b9ad 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -239,10 +239,10 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -308,10 +308,10 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -377,10 +377,10 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -446,10 +446,10 @@ define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -677,11 +677,10 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; SDAG-NEXT:    s_brev_b32 s5, 8
 ; SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
 ; SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
 ; SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
@@ -703,8 +702,8 @@ define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
 ; SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
@@ -874,11 +873,10 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; SDAG-NEXT:    s_brev_b32 s5, 8
 ; SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
 ; SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
 ; SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
@@ -900,8 +898,8 @@ define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
 ; SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
@@ -1102,11 +1100,10 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
 ; SDAG-NEXT:    s_brev_b32 s5, 8
 ; SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
 ; SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v5
 ; SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[0:1]
 ; SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[2:3]
@@ -1128,8 +1125,8 @@ define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
 ; SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
 ; SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
@@ -1201,16 +1198,14 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
 ; SDAG-NEXT:    s_mov_b32 s6, 0
 ; SDAG-NEXT:    s_brev_b32 s7, 8
 ; SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
+; SDAG-NEXT:    v_mov_b32_e32 v10, 0x100
 ; SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[6:7], v[2:3]
 ; SDAG-NEXT:    v_cmp_gt_f64_e64 s[6:7], s[6:7], v[4:5]
-; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; SDAG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v10, vcc
 ; SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v10, s[4:5]
 ; SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v6
-; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[6:7]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v10, 8, v10
+; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[6:7]
 ; SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
 ; SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SDAG-NEXT:    v_rsq_f64_e32 v[8:9], v[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 74020c43a3ca3..d2a3b7a527780 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -885,10 +885,8 @@ define amdgpu_kernel void @extract_neg_offset_sgpr(ptr addrspace(1) %out, i32 %o
 ; GENERIC-NEXT:    s_waitcnt lgkmcnt(0)
 ; GENERIC-NEXT:    s_addk_i32 s2, 0xfe00
 ; GENERIC-NEXT:    s_cmp_eq_u32 s2, 1
-; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GENERIC-NEXT:    s_cselect_b32 s4, 1, 0
 ; GENERIC-NEXT:    s_cmp_lg_u32 s2, 2
-; GENERIC-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GENERIC-NEXT:    v_readfirstlane_b32 s4, v0
 ; GENERIC-NEXT:    s_cselect_b32 s4, s4, 2
 ; GENERIC-NEXT:    s_cmp_lg_u32 s2, 3
 ; GENERIC-NEXT:    s_cselect_b32 s4, s4, 3
@@ -3319,57 +3317,56 @@ define amdgpu_kernel void @insert_neg_offset_sgpr(ptr addrspace(1) %in, ptr addr
 ; GENERIC-NEXT:    s_mov_b32 s2, -1
 ; GENERIC-NEXT:    s_waitcnt lgkmcnt(0)
 ; GENERIC-NEXT:    s_addk_i32 s6, 0xfe00
-; GENERIC-NEXT:    s_cmp_eq_u32 s6, 0
-; GENERIC-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 3
-; GENERIC-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GENERIC-NEXT:    s_cselect_b32 s4, 16, 3
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 2
-; GENERIC-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; GENERIC-NEXT:    s_cselect_b32 s5, 16, 2
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 1
 ; GENERIC-NEXT:    v_mov_b32_e32 v3, s4
 ; GENERIC-NEXT:    s_cselect_b32 s4, 16, 1
-; GENERIC-NEXT:    s_cmp_eq_u32 s6, 7
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 0
 ; GENERIC-NEXT:    v_mov_b32_e32 v2, s5
-; GENERIC-NEXT:    s_cselect_b32 s5, 16, 7
-; GENERIC-NEXT:    s_cmp_eq_u32 s6, 6
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 0
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 7
 ; GENERIC-NEXT:    v_mov_b32_e32 v1, s4
-; GENERIC-NEXT:    s_cselect_b32 s4, 16, 6
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, 7
+; GENERIC-NEXT:    s_cmp_eq_u32 s6, 6
+; GENERIC-NEXT:    v_mov_b32_e32 v0, s5
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 6
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 5
-; GENERIC-NEXT:    v_mov_b32_e32 v7, s5
-; GENERIC-NEXT:    s_cselect_b32 s5, 16, 5
+; GENERIC-NEXT:    v_mov_b32_e32 v7, s4
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, 5
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 4
-; GENERIC-NEXT:    v_mov_b32_e32 v6, s4
-; GENERIC-NEXT:    s_cselect_b32 s4, 16, 4
+; GENERIC-NEXT:    v_mov_b32_e32 v6, s5
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 4
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 11
-; GENERIC-NEXT:    v_mov_b32_e32 v5, s5
-; GENERIC-NEXT:    s_cselect_b32 s5, 16, 11
+; GENERIC-NEXT:    v_mov_b32_e32 v5, s4
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, 11
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 10
-; GENERIC-NEXT:    v_mov_b32_e32 v4, s4
+; GENERIC-NEXT:    v_mov_b32_e32 v4, s5
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GENERIC-NEXT:    s_cselect_b32 s4, 16, 10
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 10
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 9
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_mov_b32_e32 v7, s5
-; GENERIC-NEXT:    s_cselect_b32 s5, 16, 9
+; GENERIC-NEXT:    v_mov_b32_e32 v7, s4
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, 9
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 8
-; GENERIC-NEXT:    v_mov_b32_e32 v6, s4
-; GENERIC-NEXT:    s_cselect_b32 s4, 16, 8
+; GENERIC-NEXT:    v_mov_b32_e32 v6, s5
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 8
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 15
-; GENERIC-NEXT:    v_mov_b32_e32 v5, s5
-; GENERIC-NEXT:    s_cselect_b32 s5, 16, 15
+; GENERIC-NEXT:    v_mov_b32_e32 v5, s4
+; GENERIC-NEXT:    s_cselect_b32 s4, 16, 15
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 14
-; GENERIC-NEXT:    v_mov_b32_e32 v4, s4
-; GENERIC-NEXT:    s_cselect_b32 s4, 16, 14
+; GENERIC-NEXT:    v_mov_b32_e32 v4, s5
+; GENERIC-NEXT:    s_cselect_b32 s5, 16, 14
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 13
 ; GENERIC-NEXT:    s_cselect_b32 s7, 16, 13
 ; GENERIC-NEXT:    s_cmp_eq_u32 s6, 12
 ; GENERIC-NEXT:    s_cselect_b32 s6, 16, 12
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GENERIC-NEXT:    s_waitcnt expcnt(0)
-; GENERIC-NEXT:    v_mov_b32_e32 v7, s5
-; GENERIC-NEXT:    v_mov_b32_e32 v6, s4
+; GENERIC-NEXT:    v_mov_b32_e32 v7, s4
+; GENERIC-NEXT:    v_mov_b32_e32 v6, s5
 ; GENERIC-NEXT:    v_mov_b32_e32 v5, s7
 ; GENERIC-NEXT:    v_mov_b32_e32 v4, s6
 ; GENERIC-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index b850428a03c05..87a659de7c95f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -14,30 +14,31 @@
 define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 ; SI-SDAG-LABEL: s_log_f32:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_load_dword s0, s[4:5], 0xb
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT:    s_mov_b32 s1, 0x3377d1cf
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s0, v0
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    s_mov_b32 s0, 0x3f317217
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT:    v_fma_f32 v2, v0, s0, -v1
-; SI-SDAG-NEXT:    v_fma_f32 v2, v0, s1, v2
-; SI-SDAG-NEXT:    s_mov_b32 s0, 0x7f800000
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3377d1cf
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, v3
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log_f32:
@@ -70,32 +71,34 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; VI-SDAG-LABEL: s_log_f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s0, v0
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    s_mov_b32 s0, 0x7f800000
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3805fdf4, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v3, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v4, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -132,29 +135,30 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log_f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX900-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s1, 0x3377d1cf
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX900-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s0, 0x3f317217
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s0, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s1, v3
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
-; GFX900-SDAG-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3f317217
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3377d1cf
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v1
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log_f32:
@@ -188,26 +192,25 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x41b17218, s1
+; GFX1100-SDAG-NEXT:    s_and_b32 s1, s1, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s0, s1
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT:    v_fma_f32 v3, 0x3f317217, v1, -v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v3, v1, 0x3377d1cf, v3
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
@@ -316,44 +319,46 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-SDAG-LABEL: s_log_v2f32:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
 ; SI-SDAG-NEXT:    s_mov_b32 s8, 0x3377d1cf
 ; SI-SDAG-NEXT:    s_mov_b32 s9, 0x7f800000
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s3, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s3, 0x3f317217
-; SI-SDAG-NEXT:    s_mov_b32 s4, s0
-; SI-SDAG-NEXT:    s_mov_b32 s5, s1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s3, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s8, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
+; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s0
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s7, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_mov_b32 s0, s4
+; SI-SDAG-NEXT:    s_mov_b32 s1, s5
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3f317217
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, -v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s8, v5
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v5
+; SI-SDAG-NEXT:    v_fma_f32 v3, v5, s7, -v2
+; SI-SDAG-NEXT:    v_fma_f32 v3, v5, s8, v3
 ; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s9
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s2, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s2, v0
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v0
-; SI-SDAG-NEXT:    v_fma_f32 v4, v0, s3, -v3
-; SI-SDAG-NEXT:    v_fma_f32 v4, v0, s8, v4
-; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v2, v0
+; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log_v2f32:
@@ -398,49 +403,51 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ;
 ; VI-SDAG-LABEL: s_log_v2f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s7, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; VI-SDAG-NEXT:    s_mov_b32 s3, 0x7f800000
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v5
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v5, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v6, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s2
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s6, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s6, v0
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v0, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v4, v6, v4
-; VI-SDAG-NEXT:    v_add_f32_e32 v4, v5, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s2
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s4
-; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -494,41 +501,43 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log_v2f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3f317217
-; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3377d1cf
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT:    s_mov_b32 s6, 0x3377d1cf
+; GFX900-SDAG-NEXT:    s_mov_b32 s7, 0x7f800000
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, s4
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v4, s3, v4
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v4, v4
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3f317217
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v4
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s3, -v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s6, v6
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s7
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v6
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v6, s3, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v6, s6, v4
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s10, v0
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0x41b17218
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v5, v0, s2, -v4
-; GFX900-SDAG-NEXT:    v_fma_f32 v5, v0, s3, v5
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v3
-; GFX900-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s7
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; GFX900-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log_v2f32:
@@ -574,39 +583,37 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s5, 0x800000, s2
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, s3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s5
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s5, 0x800000, s2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x41b17218, s4
+; GFX1100-SDAG-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x41b17218, s5
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s3, s4
+; GFX1100-SDAG-NEXT:    s_and_b32 s5, s5, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s5, 32, 0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s5
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v3, 0x3f317217, v1 :: v_dual_lshlrev_b32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s3, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v5, 0x3f317217, v1, -v3
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v5, 0x3377d1cf, v1
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3f317217, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
-; GFX1100-SDAG-NEXT:    v_fma_f32 v4, 0x3f317217, v0, -v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v4, 0x3377d1cf, v0
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v1
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0
-; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
-; GFX1100-SDAG-NEXT:    global_store_b64 v3, v[0:1], s[0:1]
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v3
+; GFX1100-SDAG-NEXT:    v_fma_f32 v6, 0x3f317217, v1, -v4
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v7, 0x3f317217, v3, -v5
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v6, 0x3377d1cf, v1 :: v_dual_fmac_f32 v7, 0x3377d1cf, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v4, v4, v6 :: v_dual_add_f32 v5, v5, v7
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v4 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2
+; GFX1100-SDAG-NEXT:    global_store_b64 v4, v[0:1], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log_v2f32:
@@ -762,56 +769,59 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; SI-SDAG-LABEL: s_log_v3f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xd
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x41b17218
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3f317217
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s11, 0x3377d1cf
-; SI-SDAG-NEXT:    s_mov_b32 s12, 0x7f800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s9, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s9, 0x3f317217
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s9, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s11, v3
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, s8, v2
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x41b17218
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s9, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v3
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s6, -v4
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3377d1cf
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, s8, v6
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, v5
+; SI-SDAG-NEXT:    s_mov_b32 s9, 0x7f800000
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v2
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s9, -v4
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s11, v5
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v6
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s6, -v3
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s7, v4
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s10, v0
-; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
-; SI-SDAG-NEXT:    v_log_f32_e32 v5, v0
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v3, s[0:1]
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v5
-; SI-SDAG-NEXT:    v_fma_f32 v4, v5, s9, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v4, v5, s11, v4
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v5|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
-; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT:    v_log_f32_e32 v4, v0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v4
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v5
+; SI-SDAG-NEXT:    v_fma_f32 v5, v4, s6, -v3
+; SI-SDAG-NEXT:    v_fma_f32 v5, v4, s7, v5
+; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log_v3f32:
@@ -871,55 +881,59 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ;
 ; VI-SDAG-LABEL: s_log_v3f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
-; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s10, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s9, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; VI-SDAG-NEXT:    v_ldexp_f32 v2, s9, v2
-; VI-SDAG-NEXT:    v_log_f32_e32 v3, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x41b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v1
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317000, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3805fdf4, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v3|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v0, v3
+; VI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; VI-SDAG-NEXT:    v_ldexp_f32 v6, s1, v6
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v6, v3
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3805fdf4, v3
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v8, v4
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v7, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT:    v_log_f32_e32 v4, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
+; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v4
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v4, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3805fdf4, v3
@@ -927,12 +941,12 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v3
-; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
 ; VI-SDAG-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -1005,55 +1019,58 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log_v3f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GFX900-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v3
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, s2
 ; GFX900-SDAG-NEXT:    s_mov_b32 s5, 0x3377d1cf
-; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s10, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s10, 0x7f800000
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, 0x41b17218
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s4, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s5, v4
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s9, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, s9, v2
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v4, v2
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v4
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s4, -v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v4|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v0, s4, -v4
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v0, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v4, v4, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
-; GFX900-SDAG-NEXT:    global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s4, -v5
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s5, v6
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v7
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s4, -v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s5, v5
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v6
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v5
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v5, s4, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v5, s5, v6
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; GFX900-SDAG-NEXT:    global_store_dwordx3 v4, v[0:2], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log_v3f32:
@@ -1113,60 +1130,52 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s3, 0x800000, s2
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, 0x41b17218, s6
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x41b17218, s3
+; GFX1100-SDAG-NEXT:    s_and_b32 s3, s3, exec_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s6
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s2, s3
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v4, s1, s6
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 0x41b17218, s7
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v5, 0x3f317217, v2 :: v_dual_lshlrev_b32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s2, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v8, 0x3f317217, v2, -v5
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v8, 0x3377d1cf, v2
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v5, v5, v8
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3f317217, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v6, 0x3f317217, v0, -v3
-; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v6, 0x3377d1cf, v0 :: v_dual_lshlrev_b32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s1, v1
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v4, v4
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v5, s0, s2
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v3, v3, v6
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v5, v5
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v1
-; GFX1100-SDAG-NEXT:    v_fma_f32 v7, 0x3f317217, v1, -v4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v7, 0x3377d1cf, v1
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v4, v4, v7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v6, 0x3f317217, v2 :: v_dual_mul_f32 v7, 0x3f317217, v4
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v10
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v5, vcc_lo
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v9
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v6
+; GFX1100-SDAG-NEXT:    v_fma_f32 v9, 0x3f317217, v2, -v6
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v10, 0x3f317217, v4, -v7
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v9, 0x3377d1cf, v2 :: v_dual_fmac_f32 v10, 0x3377d1cf, v4
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3f317217, v5
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v6, v6, v9 :: v_dual_add_f32 v7, v7, v10
+; GFX1100-SDAG-NEXT:    v_fma_f32 v11, 0x3f317217, v5, -v8
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v4|
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v11, 0x3377d1cf, v5 :: v_dual_sub_f32 v2, v2, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v8, v8, v11
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v5|
+; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v6, 0 :: v_dual_sub_f32 v1, v4, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v5, v3
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    global_store_b96 v4, v[0:2], s[0:1]
+; GFX1100-SDAG-NEXT:    global_store_b96 v6, v[0:2], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log_v3f32:
@@ -1387,68 +1396,72 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; SI-SDAG-LABEL: s_log_v4f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xd
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT:    s_mov_b32 s12, 0x3377d1cf
-; SI-SDAG-NEXT:    s_mov_b32 s13, 0x7f800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3f317217
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s11, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s11, 0x3f317217
-; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x41b17218
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s11, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s12, v3
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, s10, v2
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s11, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s6, -v4
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3377d1cf
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, v5
+; SI-SDAG-NEXT:    s_mov_b32 s11, 0x7f800000
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, s10, v6
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v2
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s11, -v1
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s12, v5
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s9, v5
-; SI-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v5
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s8, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v5, s11, -v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v5, s12, v6
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v5|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v0, s11, -v5
-; SI-SDAG-NEXT:    v_fma_f32 v6, v0, s12, v6
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v6
+; SI-SDAG-NEXT:    v_mov_b32_e32 v7, s4
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s6, -v2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v7, s9, v7
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s7, v4
+; SI-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v5
+; SI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v7
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v6, v7, s6, -v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SI-SDAG-NEXT:    v_fma_f32 v6, v7, s7, v6
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s8, v1
 ; SI-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
-; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v6
+; SI-SDAG-NEXT:    v_fma_f32 v5, v6, s6, -v4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v6, s7, v5
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v4, v0
+; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log_v4f32:
@@ -1520,84 +1533,88 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ;
 ; VI-SDAG-LABEL: s_log_v4f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT:    s_mov_b32 s8, 0x7f800000
 ; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; VI-SDAG-NEXT:    v_ldexp_f32 v2, s10, v2
-; VI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x41b17218
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v2
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v2, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317000, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s6
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3805fdf4, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; VI-SDAG-NEXT:    v_ldexp_f32 v5, s9, v5
-; VI-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v5, v1
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3805fdf4, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v8, v6
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v7, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v5|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_and_b32_e32 v5, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v0, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v6
+; VI-SDAG-NEXT:    v_mov_b32_e32 v6, s3
+; VI-SDAG-NEXT:    v_ldexp_f32 v6, s2, v6
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v6, v2
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3805fdf4, v2
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v8, v4
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v7, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
+; VI-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v5
+; VI-SDAG-NEXT:    v_and_b32_e32 v5, 0xfffff000, v7
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v7, v5
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3f317000, v6
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3805fdf4, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3805fdf4, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v9, 0x3805fdf4, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v6, v9, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    v_add_f32_e32 v6, v8, v6
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v7, v6
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v6, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317000, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3805fdf4, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v8, v5
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3f317000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v4, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s4
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -1690,67 +1707,71 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log_v4f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GFX900-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
-; GFX900-SDAG-NEXT:    s_mov_b32 s5, 0x3377d1cf
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT:    s_mov_b32 s8, 0x3f317217
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s11, 0x7f800000
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, 0x41b17218
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s5, v3
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, s10, v2
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v2, s4, -v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v2, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 5, v6
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v6, s9, v6
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v6
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v6, s4, -v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v6, s5, v7
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v7
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v6|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317217, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v0, s4, -v6
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v0, s5, v7
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v3
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX900-SDAG-NEXT:    s_mov_b32 s9, 0x3377d1cf
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s8, -v5
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, s3
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s9, v6
+; GFX900-SDAG-NEXT:    s_mov_b32 s10, 0x7f800000
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, s2, v7
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3f317217, v7
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v8, s2
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s8, -v2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v8, s1, v8
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s9, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v8, v8
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v6
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317217, v8
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v7, v8, s8, -v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX900-SDAG-NEXT:    v_fma_f32 v7, v8, s9, v7
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v6, v6, v7
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v8|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v6, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3f317217, v7
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v7, s8, -v5
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v7, s9, v6
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v5, v0
 ; GFX900-SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1824,68 +1845,61 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s8, 0x800000, s1
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s9, 0x800000, s0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s3
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s8
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s9
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x41b17218, s6
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, 0x41b17218, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s1, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s0, v3
-; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s8, 0x800000, s1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s9, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x41b17218, s6
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s7
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s3, s6
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x41b17218, s8
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s7, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s9, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s7
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v6, s1, s3
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v7, s0, s2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v3, v3
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x41b17218, s9
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v8, 0x3f317217, v2 :: v_dual_mul_f32 v9, 0x3f317217, v3
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3f317217, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s3, v0
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3f317217, v3
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v12, 0x3f317217, v2, -v7
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v13, 0x3f317217, v3, -v8
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v10, 0x3f317217, v6 :: v_dual_mul_f32 v11, 0x3f317217, v7
+; GFX1100-SDAG-NEXT:    v_fma_f32 v12, 0x3f317217, v2, -v8
+; GFX1100-SDAG-NEXT:    v_fma_f32 v13, 0x3f317217, v3, -v9
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v14, 0x3f317217, v6, -v10
+; GFX1100-SDAG-NEXT:    v_fma_f32 v15, 0x3f317217, v7, -v11
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v7, v7, v12
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_add_f32 v8, v8, v13
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3f317217, v1
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v10, 0x3f317217, v0, -v5
-; GFX1100-SDAG-NEXT:    v_fma_f32 v11, 0x3f317217, v1, -v6
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v5, v2, v7, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v14, 0x3377d1cf, v6 :: v_dual_fmac_f32 v15, 0x3377d1cf, v7
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v8, v8, v12 :: v_dual_add_f32 v9, v9, v13
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v10, v10, v14 :: v_dual_add_f32 v11, v11, v15
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc_lo
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v3, v0, v4
-; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v8, v3, v9 :: v_dual_mov_b32 v9, 0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v6|
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v7|
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v3, v2, v0 :: v_dual_sub_f32 v2, v8, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    global_store_b128 v7, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT:    global_store_b128 v9, v[0:3], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log_v4f32:
@@ -2143,8 +2157,7 @@ define float @v_log_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2189,8 +2202,7 @@ define float @v_log_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2241,8 +2253,7 @@ define float @v_log_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2286,22 +2297,21 @@ define float @v_log_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2347,8 +2357,7 @@ define float @v_log_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2393,8 +2402,7 @@ define float @v_log_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2445,8 +2453,7 @@ define float @v_log_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2491,22 +2498,20 @@ define float @v_log_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2554,8 +2559,7 @@ define float @v_log_fneg_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -|v0|, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2600,8 +2604,7 @@ define float @v_log_fneg_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2652,8 +2655,7 @@ define float @v_log_fneg_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2698,22 +2700,20 @@ define float @v_log_fneg_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2762,8 +2762,7 @@ define float @v_log_fneg_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2808,8 +2807,7 @@ define float @v_log_fneg_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2860,8 +2858,7 @@ define float @v_log_fneg_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -2905,22 +2902,21 @@ define float @v_log_fneg_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2968,8 +2964,7 @@ define float @v_log_f32_fast(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -2995,8 +2990,7 @@ define float @v_log_f32_fast(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3022,8 +3016,7 @@ define float @v_log_f32_fast(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3048,12 +3041,10 @@ define float @v_log_f32_fast(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -3088,8 +3079,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3115,8 +3105,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3142,8 +3131,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3168,12 +3156,10 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -3208,8 +3194,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3235,8 +3220,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3262,8 +3246,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3288,12 +3271,10 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -3328,8 +3309,7 @@ define float @v_log_f32_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -3374,8 +3354,7 @@ define float @v_log_f32_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -3426,8 +3405,7 @@ define float @v_log_f32_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -3471,22 +3449,21 @@ define float @v_log_f32_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3532,8 +3509,7 @@ define float @v_log_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3559,8 +3535,7 @@ define float @v_log_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3586,8 +3561,7 @@ define float @v_log_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3612,12 +3586,10 @@ define float @v_log_f32_afn(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -3681,8 +3653,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3708,8 +3679,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3735,8 +3705,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3761,12 +3730,10 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -3801,8 +3768,7 @@ define float @v_fabs_log_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3828,8 +3794,7 @@ define float @v_fabs_log_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3855,8 +3820,7 @@ define float @v_fabs_log_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc1b17218
@@ -3882,11 +3846,10 @@ define float @v_fabs_log_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
@@ -4063,8 +4026,7 @@ define float @v_log_f32_nnan(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4109,8 +4071,7 @@ define float @v_log_f32_nnan(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4161,8 +4122,7 @@ define float @v_log_f32_nnan(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4206,22 +4166,21 @@ define float @v_log_f32_nnan(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4407,8 +4366,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4453,8 +4411,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4505,8 +4462,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4550,22 +4506,21 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4751,8 +4706,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4797,8 +4751,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4849,8 +4802,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4894,22 +4846,21 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4955,8 +4906,7 @@ define float @v_log_f32_nnan_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -4995,8 +4945,7 @@ define float @v_log_f32_nnan_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
@@ -5041,8 +4990,7 @@ define float @v_log_f32_nnan_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -5080,20 +5028,18 @@ define float @v_log_f32_nnan_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5240,8 +5186,7 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -5280,8 +5225,7 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
@@ -5326,8 +5270,7 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -5365,20 +5308,18 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5451,8 +5392,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -5497,8 +5437,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -5549,8 +5488,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -5594,22 +5532,21 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -6132,8 +6069,7 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-SDAG-NEXT:    s_mov_b32 s5, 0x3377d1cf
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -6302,8 +6238,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-NEXT:    v_log_f32_e32 v0, v0
 ; SI-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -6326,8 +6261,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; VI-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-NEXT:    v_log_f32_e32 v0, v0
 ; VI-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -6353,8 +6287,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX900-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-NEXT:    s_mov_b32 s4, 0x3f317217
@@ -6377,22 +6310,20 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
 ; GFX1100-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1100-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index d09df75837339..d7cefd6ed12ec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -14,30 +14,31 @@
 define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 ; SI-SDAG-LABEL: s_log10_f32:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_load_dword s0, s[4:5], 0xb
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT:    s_mov_b32 s1, 0x3284fbcf
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s0, v0
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    s_mov_b32 s0, 0x3e9a209a
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT:    v_fma_f32 v2, v0, s0, -v1
-; SI-SDAG-NEXT:    v_fma_f32 v2, v0, s1, v2
-; SI-SDAG-NEXT:    s_mov_b32 s0, 0x7f800000
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3284fbcf
+; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s4, v3
+; SI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log10_f32:
@@ -70,32 +71,34 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; VI-SDAG-LABEL: s_log10_f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s0, v0
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    s_mov_b32 s0, 0x7f800000
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x369a84fb, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v3, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v4, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s0
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    flat_store_dword v[0:1], v2
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -132,29 +135,30 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log10_f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX900-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s1, 0x3284fbcf
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX900-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s0, 0x3e9a209a
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s0, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s1, v3
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
-; GFX900-SDAG-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3e9a209a
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3284fbcf
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v1
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log10_f32:
@@ -188,26 +192,25 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s2, 0x800000, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s0, v0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s1, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x411a209b, s1
+; GFX1100-SDAG-NEXT:    s_and_b32 s1, s1, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s0, s1
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX1100-SDAG-NEXT:    v_fma_f32 v3, 0x3e9a209a, v1, -v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v3, v1, 0x3284fbcf, v3
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
@@ -316,44 +319,46 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; SI-SDAG-LABEL: s_log10_v2f32:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
 ; SI-SDAG-NEXT:    s_mov_b32 s8, 0x3284fbcf
 ; SI-SDAG-NEXT:    s_mov_b32 s9, 0x7f800000
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s3, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s3, 0x3e9a209a
-; SI-SDAG-NEXT:    s_mov_b32 s4, s0
-; SI-SDAG-NEXT:    s_mov_b32 s5, s1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s3, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s8, v3
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
+; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s0
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s7, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_mov_b32 s0, s4
+; SI-SDAG-NEXT:    s_mov_b32 s1, s5
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3e9a209a
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, -v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s8, v5
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v5
+; SI-SDAG-NEXT:    v_fma_f32 v3, v5, s7, -v2
+; SI-SDAG-NEXT:    v_fma_f32 v3, v5, s8, v3
 ; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s9
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s2, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s2, v0
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v0
-; SI-SDAG-NEXT:    v_fma_f32 v4, v0, s3, -v3
-; SI-SDAG-NEXT:    v_fma_f32 v4, v0, s8, v4
-; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s9
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v2, v0
+; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log10_v2f32:
@@ -398,49 +403,51 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ;
 ; VI-SDAG-LABEL: s_log10_v2f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s2, 0x7f800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s7, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v3
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; VI-SDAG-NEXT:    s_mov_b32 s3, 0x7f800000
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v5
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v5, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v6, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s2
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s6, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s6, v0
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v0, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v4, v6, v4
-; VI-SDAG-NEXT:    v_add_f32_e32 v4, v5, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a2000, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s2
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s4
-; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -494,41 +501,43 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ;
 ; GFX900-SDAG-LABEL: s_log10_v2f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s2, 0x3e9a209a
-; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3284fbcf
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT:    s_mov_b32 s6, 0x3284fbcf
+; GFX900-SDAG-NEXT:    s_mov_b32 s7, 0x7f800000
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s2, -v3
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s3, v4
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, s4
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v4, s3, v4
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v4, v4
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_mov_b32 s3, 0x3e9a209a
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v4
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s3, -v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s6, v6
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s7
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v6
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v6, s3, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v4, v6, s6, v4
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s10, v0
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0x411a209b
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v5, v0, s2, -v4
-; GFX900-SDAG-NEXT:    v_fma_f32 v5, v0, s3, v5
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v3
-; GFX900-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s7
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; GFX900-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log10_v2f32:
@@ -574,39 +583,37 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s5, 0x800000, s2
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, s3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s5
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s5, 0x800000, s2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x411a209b, s4
+; GFX1100-SDAG-NEXT:    s_and_b32 s4, s4, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x411a209b, s5
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s3, s4
+; GFX1100-SDAG-NEXT:    s_and_b32 s5, s5, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s5, 32, 0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s5
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v3, 0x3e9a209a, v1 :: v_dual_lshlrev_b32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s3, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v5, 0x3e9a209a, v1, -v3
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v5, 0x3284fbcf, v1
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3e9a209a, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x411a209b, s5
-; GFX1100-SDAG-NEXT:    v_fma_f32 v4, 0x3e9a209a, v0, -v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v4, 0x3284fbcf, v0
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x411a209b, s4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v1
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0
-; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5
-; GFX1100-SDAG-NEXT:    global_store_b64 v3, v[0:1], s[0:1]
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v3
+; GFX1100-SDAG-NEXT:    v_fma_f32 v6, 0x3e9a209a, v1, -v4
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v7, 0x3e9a209a, v3, -v5
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v6, 0x3284fbcf, v1 :: v_dual_fmac_f32 v7, 0x3284fbcf, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v4, v4, v6 :: v_dual_add_f32 v5, v5, v7
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v1, v1, v4 :: v_dual_mov_b32 v4, 0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2
+; GFX1100-SDAG-NEXT:    global_store_b64 v4, v[0:1], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log10_v2f32:
@@ -762,56 +769,59 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-SDAG-LABEL: s_log10_v3f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xd
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x411a209b
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3e9a209a
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    s_mov_b32 s11, 0x3284fbcf
-; SI-SDAG-NEXT:    s_mov_b32 s12, 0x7f800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s9, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s9, 0x3e9a209a
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s9, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s11, v3
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, s8, v2
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v3, 0x411a209b
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s9, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v3
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s6, -v4
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3284fbcf
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, s8, v6
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, v5
+; SI-SDAG-NEXT:    s_mov_b32 s9, 0x7f800000
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v2
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s9, -v4
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s11, v5
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v6
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s6, -v3
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s7, v4
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s10, v0
-; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
-; SI-SDAG-NEXT:    v_log_f32_e32 v5, v0
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v3, s[0:1]
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v5
-; SI-SDAG-NEXT:    v_fma_f32 v4, v5, s9, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v4, v5, s11, v4
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v5|, s12
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
-; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; SI-SDAG-NEXT:    v_log_f32_e32 v4, v0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v3, vcc
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v4
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v5
+; SI-SDAG-NEXT:    v_fma_f32 v5, v4, s6, -v3
+; SI-SDAG-NEXT:    v_fma_f32 v5, v4, s7, v5
+; SI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s9
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; SI-SDAG-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log10_v3f32:
@@ -871,55 +881,59 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ;
 ; VI-SDAG-LABEL: s_log10_v3f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
-; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s10, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s9, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; VI-SDAG-NEXT:    v_ldexp_f32 v2, s9, v2
-; VI-SDAG-NEXT:    v_log_f32_e32 v3, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x411a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v1
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a2000, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x369a84fb, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v3|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v0, v3
+; VI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; VI-SDAG-NEXT:    v_ldexp_f32 v6, s1, v6
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v6, v3
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x369a84fb, v3
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v8, v4
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v7, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-SDAG-NEXT:    v_log_f32_e32 v4, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
+; VI-SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff000, v4
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v4, v3
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a2000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x369a84fb, v3
@@ -927,12 +941,12 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a2000, v3
 ; VI-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v3
-; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v4|, s6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s3
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
 ; VI-SDAG-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-SDAG-NEXT:    s_endpgm
 ;
@@ -1005,55 +1019,58 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ;
 ; GFX900-SDAG-LABEL: s_log10_v3f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GFX900-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v3
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, s2
 ; GFX900-SDAG-NEXT:    s_mov_b32 s5, 0x3284fbcf
-; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s10, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s10, 0x7f800000
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, 0x411a209b
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s4, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v4, v1, s5, v4
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s9, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, s9, v2
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v4, v2
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v4
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s4, -v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v4, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v4|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v0, s4, -v4
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v0, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v4, v4, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s10
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[0:1]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
-; GFX900-SDAG-NEXT:    global_store_dwordx3 v3, v[0:2], s[6:7]
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s4, -v5
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s5, v6
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v3, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v7
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s4, -v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s5, v5
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v5, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v6
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v5
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v5, s4, -v3
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v5, s5, v6
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v3, v3, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v5|, s8
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v0
+; GFX900-SDAG-NEXT:    global_store_dwordx3 v4, v[0:2], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
 ; GFX900-GISEL-LABEL: s_log10_v3f32:
@@ -1113,60 +1130,52 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s3, 0x800000, s2
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, 0x411a209b, s3
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, 0x411a209b, s6
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x411a209b, s3
+; GFX1100-SDAG-NEXT:    s_and_b32 s3, s3, exec_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s6
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s2, s3
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v4, s1, s6
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 0x411a209b, s7
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v5, 0x3e9a209a, v2 :: v_dual_lshlrev_b32 v0, 5, v0
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s2, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v8, 0x3e9a209a, v2, -v5
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v8, 0x3284fbcf, v2
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v5, v5, v8
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3e9a209a, v0
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v6, 0x3e9a209a, v0, -v3
-; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v6, 0x3284fbcf, v0 :: v_dual_lshlrev_b32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s1, v1
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v4, v4
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v5, s0, s2
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v3, v3, v6
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 0x411a209b, s7
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v5, v5
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v1
-; GFX1100-SDAG-NEXT:    v_fma_f32 v7, 0x3e9a209a, v1, -v4
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v7, 0x3284fbcf, v1
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v4, v4, v7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v6, 0x3e9a209a, v2 :: v_dual_mul_f32 v7, 0x3e9a209a, v4
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v10
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v3, v2, v5, vcc_lo
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v2, v0, v9
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v6
+; GFX1100-SDAG-NEXT:    v_fma_f32 v9, 0x3e9a209a, v2, -v6
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v10, 0x3e9a209a, v4, -v7
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v9, 0x3284fbcf, v2 :: v_dual_fmac_f32 v10, 0x3284fbcf, v4
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3e9a209a, v5
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v6, v6, v9 :: v_dual_add_f32 v7, v7, v10
+; GFX1100-SDAG-NEXT:    v_fma_f32 v11, 0x3e9a209a, v5, -v8
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v4|
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v11, 0x3284fbcf, v5 :: v_dual_sub_f32 v2, v2, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v8, v8, v11
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v5|
+; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v6, 0 :: v_dual_sub_f32 v1, v4, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v5, v3
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    global_store_b96 v4, v[0:2], s[0:1]
+; GFX1100-SDAG-NEXT:    global_store_b96 v6, v[0:2], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log10_v3f32:
@@ -1387,68 +1396,72 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-LABEL: s_log10_v4f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xd
-; SI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; SI-SDAG-NEXT:    s_mov_b32 s12, 0x3284fbcf
-; SI-SDAG-NEXT:    s_mov_b32 s13, 0x7f800000
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3e9a209a
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s11, v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; SI-SDAG-NEXT:    s_mov_b32 s11, 0x3e9a209a
-; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x411a209b
-; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s11, -v2
-; SI-SDAG-NEXT:    v_fma_f32 v3, v1, s12, v3
-; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v2, s10, v2
-; SI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s11, v3
+; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s10, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s6, -v4
+; SI-SDAG-NEXT:    s_mov_b32 s7, 0x3284fbcf
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, s4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v3, s7, v5
+; SI-SDAG-NEXT:    s_mov_b32 s11, 0x7f800000
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v6, s10, v6
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v2
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s11, -v1
-; SI-SDAG-NEXT:    v_fma_f32 v5, v2, s12, v5
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s9, v5
-; SI-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[0:1]
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v5
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s8, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v5, s11, -v1
-; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v5, s12, v6
-; SI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v5|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
-; SI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v0
-; SI-SDAG-NEXT:    v_fma_f32 v6, v0, s11, -v5
-; SI-SDAG-NEXT:    v_fma_f32 v6, v0, s12, v6
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v6
+; SI-SDAG-NEXT:    v_mov_b32_e32 v7, s4
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s6, -v2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v7, s9, v7
+; SI-SDAG-NEXT:    v_fma_f32 v4, v6, s7, v4
+; SI-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; SI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; SI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v5
+; SI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v7
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; SI-SDAG-NEXT:    v_fma_f32 v6, v7, s6, -v5
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s4
+; SI-SDAG-NEXT:    v_fma_f32 v6, v7, s7, v6
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s8, v1
 ; SI-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s13
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; SI-SDAG-NEXT:    s_mov_b32 s6, -1
-; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
-; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; SI-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; SI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v6
+; SI-SDAG-NEXT:    v_fma_f32 v5, v6, s6, -v4
+; SI-SDAG-NEXT:    v_fma_f32 v5, v6, s7, v5
+; SI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s11
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT:    s_mov_b32 s2, -1
+; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v4, v0
+; SI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-SDAG-NEXT:    s_endpgm
 ;
 ; SI-GISEL-LABEL: s_log10_v4f32:
@@ -1520,84 +1533,88 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ;
 ; VI-SDAG-LABEL: s_log10_v4f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; VI-SDAG-NEXT:    s_mov_b32 s6, 0x7f800000
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT:    s_mov_b32 s8, 0x7f800000
 ; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v1
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v4, v3
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
-; VI-SDAG-NEXT:    v_add_f32_e32 v3, v5, v3
-; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; VI-SDAG-NEXT:    v_ldexp_f32 v2, s10, v2
-; VI-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x411a209b
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v2
-; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v2, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a2000, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s6
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v3
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v3, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v5
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x369a84fb, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v6, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; VI-SDAG-NEXT:    v_ldexp_f32 v5, s9, v5
-; VI-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v5, v1
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x369a84fb, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v8, v6
-; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v7, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
-; VI-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v5|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
-; VI-SDAG-NEXT:    v_and_b32_e32 v5, 0xfffff000, v0
-; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v0, v5
-; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v6
+; VI-SDAG-NEXT:    v_mov_b32_e32 v6, s3
+; VI-SDAG-NEXT:    v_ldexp_f32 v6, s2, v6
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; VI-SDAG-NEXT:    v_and_b32_e32 v2, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_sub_f32_e32 v4, v6, v2
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x369a84fb, v2
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v8, v4
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v7, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
+; VI-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-SDAG-NEXT:    v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v5
+; VI-SDAG-NEXT:    v_and_b32_e32 v5, 0xfffff000, v7
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; VI-SDAG-NEXT:    v_sub_f32_e32 v6, v7, v5
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3e9a2000, v6
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v6, 0x369a84fb, v6
-; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x369a84fb, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v9, 0x369a84fb, v5
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; VI-SDAG-NEXT:    v_add_f32_e32 v6, v9, v6
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    v_add_f32_e32 v6, v8, v6
-; VI-SDAG-NEXT:    v_add_f32_e32 v6, v7, v6
 ; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; VI-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s6
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v4
+; VI-SDAG-NEXT:    v_log_f32_e32 v6, v1
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT:    v_and_b32_e32 v4, 0xfffff000, v6
+; VI-SDAG-NEXT:    v_sub_f32_e32 v5, v6, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a2000, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v8, 0x369a84fb, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v8, v5
+; VI-SDAG-NEXT:    v_add_f32_e32 v5, v7, v5
+; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3e9a2000, v4
+; VI-SDAG-NEXT:    v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v6|, s8
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; VI-SDAG-NEXT:    v_sub_f32_e32 v0, v4, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v4, s4
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-SDAG-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -1690,67 +1707,71 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ;
 ; GFX900-SDAG-LABEL: s_log10_v4f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x34
+; GFX900-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GFX900-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
-; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
-; GFX900-SDAG-NEXT:    s_mov_b32 s5, 0x3284fbcf
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT:    s_mov_b32 s8, 0x3e9a209a
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s11, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s11, v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    s_mov_b32 s11, 0x7f800000
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, 0x411a209b
-; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s4, -v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v3, v1, s5, v3
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v1|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s10, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v2, s10, v2
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v3, v1, v3
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s9, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v2
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v2, s4, -v1
-; GFX900-SDAG-NEXT:    v_fma_f32 v6, v2, s5, v6
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 5, v6
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v6, s9, v6
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v6, v6
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v2|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[0:1], s8, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v6
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, s8, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v6, s4, -v1
-; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v6, s5, v7
-; GFX900-SDAG-NEXT:    v_add_f32_e32 v1, v1, v7
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 s[2:3], |v6|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, v6, v1, s[2:3]
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v5, vcc
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v6
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v0, s4, -v6
-; GFX900-SDAG-NEXT:    v_fma_f32 v7, v0, s5, v7
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v3
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX900-SDAG-NEXT:    s_mov_b32 s9, 0x3284fbcf
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s8, -v5
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, s3
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v3, s9, v6
+; GFX900-SDAG-NEXT:    s_mov_b32 s10, 0x7f800000
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v7, s2, v7
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v3|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v3, v3, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3e9a209a, v7
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v8, s2
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s8, -v2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v8, s1, v8
+; GFX900-SDAG-NEXT:    v_fma_f32 v5, v7, s9, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v8, v8
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v2, v2, v5
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v6
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a209a, v8
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_fma_f32 v7, v8, s8, -v6
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; GFX900-SDAG-NEXT:    v_fma_f32 v7, v8, s9, v7
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX900-SDAG-NEXT:    v_add_f32_e32 v6, v6, v7
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s11
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, v5, s[0:1]
-; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT:    v_log_f32_e32 v7, v1
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v8|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v1, v8, v6, vcc
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v1, v1, v5
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3e9a209a, v7
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v7, s8, -v5
+; GFX900-SDAG-NEXT:    v_fma_f32 v6, v7, s9, v6
+; GFX900-SDAG-NEXT:    v_add_f32_e32 v5, v5, v6
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v7|, s10
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v5, v0
 ; GFX900-SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GFX900-SDAG-NEXT:    s_endpgm
 ;
@@ -1824,68 +1845,61 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s8, 0x800000, s1
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s9, 0x800000, s0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s3
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s8
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s9
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x411a209b, s6
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, 0x411a209b, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v14, 0, 0x411a209b, s8
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v15, 0, 0x411a209b, s9
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s1, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s0, v3
-; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s8, 0x800000, s1
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s9, 0x800000, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x411a209b, s6
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s7
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s3, s6
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x411a209b, s8
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s7, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s9, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s7
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v6, s1, s3
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v7, s0, s2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v3, v3
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x411a209b, s9
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v8, 0x3e9a209a, v2 :: v_dual_mul_f32 v9, 0x3e9a209a, v3
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v7, 0x3e9a209a, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, s3, v0
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v8, 0x3e9a209a, v3
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v12, 0x3e9a209a, v2, -v7
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v13, 0x3e9a209a, v3, -v8
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v10, 0x3e9a209a, v6 :: v_dual_mul_f32 v11, 0x3e9a209a, v7
+; GFX1100-SDAG-NEXT:    v_fma_f32 v12, 0x3e9a209a, v2, -v8
+; GFX1100-SDAG-NEXT:    v_fma_f32 v13, 0x3e9a209a, v3, -v9
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v14, 0x3e9a209a, v6, -v10
+; GFX1100-SDAG-NEXT:    v_fma_f32 v15, 0x3e9a209a, v7, -v11
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v7, v7, v12
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_add_f32 v8, v8, v13
-; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v6, 0x3e9a209a, v1
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_fma_f32 v10, 0x3e9a209a, v0, -v5
-; GFX1100-SDAG-NEXT:    v_fma_f32 v11, 0x3e9a209a, v1, -v6
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1
-; GFX1100-SDAG-NEXT:    v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc_lo
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v5, v2, v7, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_fmac_f32 v14, 0x3284fbcf, v6 :: v_dual_fmac_f32 v15, 0x3284fbcf, v7
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v8, v8, v12 :: v_dual_add_f32 v9, v9, v13
+; GFX1100-SDAG-NEXT:    v_dual_add_f32 v10, v10, v14 :: v_dual_add_f32 v11, v11, v15
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v6, v3, v8, vcc_lo
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v3, v0, v4
-; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15
+; GFX1100-SDAG-NEXT:    v_dual_cndmask_b32 v8, v3, v9 :: v_dual_mov_b32 v9, 0
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v6|
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v7|
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v3, v2, v0 :: v_dual_sub_f32 v2, v8, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    global_store_b128 v7, v[0:3], s[0:1]
+; GFX1100-SDAG-NEXT:    global_store_b128 v9, v[0:3], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
 ; GFX1100-GISEL-LABEL: s_log10_v4f32:
@@ -2143,8 +2157,7 @@ define float @v_log10_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2189,8 +2202,7 @@ define float @v_log10_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2241,8 +2253,7 @@ define float @v_log10_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2286,22 +2297,21 @@ define float @v_log10_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2347,8 +2357,7 @@ define float @v_log10_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2393,8 +2402,7 @@ define float @v_log10_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2445,8 +2453,7 @@ define float @v_log10_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2491,22 +2498,20 @@ define float @v_log10_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2554,8 +2559,7 @@ define float @v_log10_fneg_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -|v0|, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2600,8 +2604,7 @@ define float @v_log10_fneg_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2652,8 +2655,7 @@ define float @v_log10_fneg_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2698,22 +2700,20 @@ define float @v_log10_fneg_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2762,8 +2762,7 @@ define float @v_log10_fneg_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2808,8 +2807,7 @@ define float @v_log10_fneg_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -2860,8 +2858,7 @@ define float @v_log10_fneg_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -2905,22 +2902,21 @@ define float @v_log10_fneg_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2968,8 +2964,7 @@ define float @v_log10_f32_fast(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -2995,8 +2990,7 @@ define float @v_log10_f32_fast(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3022,8 +3016,7 @@ define float @v_log10_f32_fast(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3048,12 +3041,10 @@ define float @v_log10_f32_fast(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -3088,8 +3079,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3115,8 +3105,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3142,8 +3131,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3168,12 +3156,10 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -3208,8 +3194,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3235,8 +3220,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3262,8 +3246,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3288,12 +3271,10 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -3328,8 +3309,7 @@ define float @v_log10_f32_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -3374,8 +3354,7 @@ define float @v_log10_f32_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -3426,8 +3405,7 @@ define float @v_log10_f32_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -3471,22 +3449,21 @@ define float @v_log10_f32_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3532,8 +3509,7 @@ define float @v_log10_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3559,8 +3535,7 @@ define float @v_log10_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3586,8 +3561,7 @@ define float @v_log10_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3612,12 +3586,10 @@ define float @v_log10_f32_afn(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -3681,8 +3653,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3708,8 +3679,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3735,8 +3705,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3761,12 +3730,10 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -3801,8 +3768,7 @@ define float @v_fabs_log10_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3828,8 +3794,7 @@ define float @v_fabs_log10_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3855,8 +3820,7 @@ define float @v_fabs_log10_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0xc11a209b
@@ -3882,11 +3846,10 @@ define float @v_fabs_log10_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
@@ -4063,8 +4026,7 @@ define float @v_log10_f32_nnan(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4109,8 +4071,7 @@ define float @v_log10_f32_nnan(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4161,8 +4122,7 @@ define float @v_log10_f32_nnan(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4206,22 +4166,21 @@ define float @v_log10_f32_nnan(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4407,8 +4366,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4453,8 +4411,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4505,8 +4462,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4550,22 +4506,21 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4751,8 +4706,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4797,8 +4751,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -4849,8 +4802,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4894,22 +4846,21 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -4955,8 +4906,7 @@ define float @v_log10_f32_nnan_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -4995,8 +4945,7 @@ define float @v_log10_f32_nnan_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
@@ -5041,8 +4990,7 @@ define float @v_log10_f32_nnan_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -5080,20 +5028,18 @@ define float @v_log10_f32_nnan_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5240,8 +5186,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -5280,8 +5225,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
@@ -5326,8 +5270,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -5365,20 +5308,18 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5451,8 +5392,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -5497,8 +5437,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -5549,8 +5488,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -5594,22 +5532,21 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -6132,8 +6069,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-SDAG-NEXT:    s_mov_b32 s5, 0x3284fbcf
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -6302,8 +6238,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-NEXT:    v_log_f32_e32 v0, v0
 ; SI-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -6326,8 +6261,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; VI-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-NEXT:    v_log_f32_e32 v0, v0
 ; VI-NEXT:    s_mov_b32 s4, 0x7f800000
@@ -6353,8 +6287,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX900-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; GFX900-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GFX900-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-NEXT:    s_mov_b32 s4, 0x3e9a209a
@@ -6377,22 +6310,20 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
 ; GFX1100-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1100-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
 ; GFX1100-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
 ; GFX1100-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index 8b3b79b0b1bdd..ebfc953a6bb96 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -14,18 +14,19 @@
 define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
 ; SI-SDAG-LABEL: s_log2_f32:
 ; SI-SDAG:       ; %bb.0:
-; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0xb
+; SI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0xb
 ; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
-; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s2, v1
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s6, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v1, v1
+; SI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-SDAG-NEXT:    s_mov_b32 s2, -1
 ; SI-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
 ; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -52,16 +53,17 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; VI-SDAG-LABEL: s_log2_f32:
 ; VI-SDAG:       ; %bb.0:
-; VI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; VI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; VI-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; VI-SDAG-NEXT:    v_sub_f32_e32 v2, v1, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
@@ -90,17 +92,18 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; GFX900-SDAG-LABEL: s_log2_f32:
 ; GFX900-SDAG:       ; %bb.0:
-; GFX900-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX900-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX900-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s6, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; GFX900-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
 ; GFX900-SDAG-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -130,13 +133,12 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
 ; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x42000000, s0
+; GFX1100-SDAG-NEXT:    s_and_b32 s0, s0, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s2, s3
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v1, v0
@@ -221,14 +223,16 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; SI-SDAG-NEXT:    s_mov_b32 s6, -1
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; SI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; SI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s3, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s2, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v4, v1
@@ -271,14 +275,16 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; VI-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; VI-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v4, v1
@@ -322,14 +328,16 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s2, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v4, v1
@@ -365,26 +373,24 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; GFX1100-SDAG-LABEL: s_log2_v2f32:
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s4, 0x800000, s3
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s5, 0x800000, s2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s5
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x42000000, s4
+; GFX1100-SDAG-NEXT:    s_and_b32 s4, s4, exec_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, s5
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s3, v1
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s5, s5, exec_lo
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s5, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v1, s3, s4
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s5
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v1, v1
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2
+; GFX1100-SDAG-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v0
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v3, v2
 ; GFX1100-SDAG-NEXT:    global_store_b64 v4, v[0:1], s[0:1]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
@@ -488,20 +494,23 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; SI-SDAG-NEXT:    s_and_b64 s[8:9], vcc, exec
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; SI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
+; SI-SDAG-NEXT:    s_and_b64 s[8:9], vcc, exec
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s1, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v5, s1
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s1, v3
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s0, v5
+; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s0, 32, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, s2, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s0, v5
 ; SI-SDAG-NEXT:    v_log_f32_e32 v7, v0
 ; SI-SDAG-NEXT:    v_log_f32_e32 v5, v5
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
@@ -555,19 +564,22 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s3
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s2
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; VI-SDAG-NEXT:    v_ldexp_f32 v5, s1, v5
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_log_f32_e32 v5, v5
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
@@ -622,19 +634,22 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v5, s2
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v5, s1, v5
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v5, v5
@@ -682,35 +697,31 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; GFX1100-SDAG-NEXT:    s_clause 0x1
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s1
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s3, 0x800000, s2
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s1
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s6
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x42000000, s3
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 5, v4
+; GFX1100-SDAG-NEXT:    s_and_b32 s3, s3, exec_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s6
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 0x42000000, s7
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s2, s3
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v4, s1, s6
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v5, s0, s2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v4, s1, v4
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v5, s0, v5
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v4, v4
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v1, v4, v1
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s2, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v5, v5
+; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v2, v2, v0
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v2, v2, v0 :: v_dual_sub_f32 v1, v4, v1
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v5, v3
 ; GFX1100-SDAG-NEXT:    global_store_b96 v6, v[0:2], s[4:5]
 ; GFX1100-SDAG-NEXT:    s_endpgm
@@ -852,24 +863,28 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; SI-SDAG-NEXT:    s_mov_b32 s2, -1
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s7, v0
+; SI-SDAG-NEXT:    s_and_b64 s[8:9], vcc, exec
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; SI-SDAG-NEXT:    s_cselect_b32 s8, 32, 0
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v3, s8
+; SI-SDAG-NEXT:    s_and_b64 s[8:9], vcc, exec
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s7, v3
+; SI-SDAG-NEXT:    s_cselect_b32 s7, 32, 0
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v5, s7
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s5, v0
+; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s6, v5
+; SI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; SI-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v7, 5, v7
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v3, s7, v3
-; SI-SDAG-NEXT:    v_ldexp_f32_e32 v5, s6, v5
+; SI-SDAG-NEXT:    v_mov_b32_e32 v7, s6
+; SI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v7, s5, v7
+; SI-SDAG-NEXT:    s_cselect_b32 s5, 32, 0
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v1, s4, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_log_f32_e32 v5, v5
@@ -930,26 +945,30 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; VI-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v3, s6
+; VI-SDAG-NEXT:    s_and_b64 s[6:7], vcc, exec
+; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; VI-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; VI-SDAG-NEXT:    v_mov_b32_e32 v5, s3
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; VI-SDAG-NEXT:    v_ldexp_f32 v5, s2, v5
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v5, 5, v5
+; VI-SDAG-NEXT:    v_mov_b32_e32 v7, s2
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
+; VI-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
-; VI-SDAG-NEXT:    v_ldexp_f32 v5, s2, v5
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v7, 5, v7
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_log_f32_e32 v5, v5
-; VI-SDAG-NEXT:    v_ldexp_f32 v7, s1, v7
 ; VI-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; VI-SDAG-NEXT:    v_log_f32_e32 v7, v7
 ; VI-SDAG-NEXT:    v_log_f32_e32 v8, v1
@@ -1011,24 +1030,28 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX900-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s3, v0
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX900-SDAG-NEXT:    s_cselect_b32 s4, 32, 0
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v3, s4
+; GFX900-SDAG-NEXT:    s_and_b64 s[4:5], vcc, exec
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
+; GFX900-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v6, s3
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s1, v0
+; GFX900-SDAG-NEXT:    v_ldexp_f32 v6, s2, v6
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX900-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 5, v6
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v8, 5, v8
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v3, s3, v3
-; GFX900-SDAG-NEXT:    v_ldexp_f32 v6, s2, v6
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v8, s2
+; GFX900-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v8, s1, v8
+; GFX900-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v1, s0, v1
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v6, v6
@@ -1085,42 +1108,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; GFX1100-SDAG-NEXT:    s_clause 0x1
 ; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
 ; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s6, 0x800000, s3
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s7, 0x800000, s2
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s8, 0x800000, s1
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s9, 0x800000, s0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s6
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s7
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s8
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s9
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 5, v3
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 0x42000000, s6
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s6, exec_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s7
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s6, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s7, s7, exec_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 0x42000000, s8
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v7, 5, v7
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s3, v2
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, v3
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x42000000, s9
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v7, s0, v7
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s7, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v2, s3, s6
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s3, 32, 0
+; GFX1100-SDAG-NEXT:    s_and_b32 s6, s9, exec_lo
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v3, s2, s7
+; GFX1100-SDAG-NEXT:    s_cselect_b32 s2, 32, 0
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v6, s1, s3
+; GFX1100-SDAG-NEXT:    v_ldexp_f32 v7, s0, s2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v2, v2
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 5, v6
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v8, v3
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_3)
-; GFX1100-SDAG-NEXT:    v_log_f32_e32 v7, v7
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v3, v2, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_ldexp_f32 v6, s1, v6
-; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v2, v8, v1
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v7, v5
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v5, 0, 0x42000000, s9
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v6, v6
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v7, v7
+; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v9, 0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v3, v2, v0 :: v_dual_sub_f32 v2, v8, v1
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v1, v6, v4
+; GFX1100-SDAG-NEXT:    v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5
 ; GFX1100-SDAG-NEXT:    global_store_b128 v9, v[0:3], s[4:5]
 ; GFX1100-SDAG-NEXT:    s_endpgm
 ;
@@ -1272,8 +1290,7 @@ define float @v_log2_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1300,8 +1317,7 @@ define float @v_log2_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1328,8 +1344,7 @@ define float @v_log2_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1355,12 +1370,10 @@ define float @v_log2_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -1400,8 +1413,7 @@ define float @v_log2_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1428,8 +1440,7 @@ define float @v_log2_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1456,8 +1467,7 @@ define float @v_log2_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1484,11 +1494,10 @@ define float @v_log2_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -1529,8 +1538,7 @@ define float @v_log2_fneg_fabs_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -|v0|, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1557,8 +1565,7 @@ define float @v_log2_fneg_fabs_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1585,8 +1592,7 @@ define float @v_log2_fneg_fabs_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1613,11 +1619,10 @@ define float @v_log2_fneg_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e64 s0, 0x80800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -|v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -1659,8 +1664,7 @@ define float @v_log2_fneg_f32(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, -v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1687,8 +1691,7 @@ define float @v_log2_fneg_f32(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1715,8 +1718,7 @@ define float @v_log2_fneg_f32(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x80800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1742,12 +1744,10 @@ define float @v_log2_fneg_f32(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, -v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -1788,8 +1788,7 @@ define float @v_log2_f32_fast(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1816,8 +1815,7 @@ define float @v_log2_f32_fast(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1844,8 +1842,7 @@ define float @v_log2_f32_fast(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1871,12 +1868,10 @@ define float @v_log2_f32_fast(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -1916,8 +1911,7 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1944,8 +1938,7 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1972,8 +1965,7 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -1999,12 +1991,10 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2044,8 +2034,7 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2072,8 +2061,7 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2100,8 +2088,7 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2127,12 +2114,10 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2172,8 +2157,7 @@ define float @v_log2_f32_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2200,8 +2184,7 @@ define float @v_log2_f32_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2228,8 +2211,7 @@ define float @v_log2_f32_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2255,12 +2237,10 @@ define float @v_log2_f32_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2300,8 +2280,7 @@ define float @v_log2_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2328,8 +2307,7 @@ define float @v_log2_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2356,8 +2334,7 @@ define float @v_log2_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2383,12 +2360,10 @@ define float @v_log2_f32_afn(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2454,8 +2429,7 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2482,8 +2456,7 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2510,8 +2483,7 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2537,12 +2509,10 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2582,8 +2552,7 @@ define float @v_fabs_log2_f32_afn(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e64 v0, |v0|, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2610,8 +2579,7 @@ define float @v_fabs_log2_f32_afn(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2638,8 +2606,7 @@ define float @v_fabs_log2_f32_afn(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2666,11 +2633,10 @@ define float @v_fabs_log2_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2737,8 +2703,7 @@ define float @v_log2_f32_nnan(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2765,8 +2730,7 @@ define float @v_log2_f32_nnan(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2793,8 +2757,7 @@ define float @v_log2_f32_nnan(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2820,12 +2783,10 @@ define float @v_log2_f32_nnan(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -2891,8 +2852,7 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2919,8 +2879,7 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2947,8 +2906,7 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -2974,12 +2932,10 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -3045,8 +3001,7 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3073,8 +3028,7 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3101,8 +3055,7 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3128,12 +3081,10 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -3173,8 +3124,7 @@ define float @v_log2_f32_nnan_ninf(float %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3201,8 +3151,7 @@ define float @v_log2_f32_nnan_ninf(float %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3229,8 +3178,7 @@ define float @v_log2_f32_nnan_ninf(float %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3256,12 +3204,10 @@ define float @v_log2_f32_nnan_ninf(float %in) {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -3327,8 +3273,7 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3355,8 +3300,7 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3383,8 +3327,7 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3410,12 +3353,10 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -3481,8 +3422,7 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3509,8 +3449,7 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; VI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3537,8 +3476,7 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3564,12 +3502,10 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
 ; GFX1100-SDAG:       ; %bb.0:
 ; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
 ; GFX1100-SDAG-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
@@ -3738,8 +3674,7 @@ define float @v_log2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-SDAG-NEXT:    v_add_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3806,8 +3741,7 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) {
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, 0x800000
 ; SI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; SI-NEXT:    v_ldexp_f32_e32 v0, v0, v2
 ; SI-NEXT:    v_log_f32_e32 v0, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3821,8 +3755,7 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) {
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; VI-NEXT:    s_mov_b32 s4, 0x800000
 ; VI-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 5, v1
+; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc
 ; VI-NEXT:    v_ldexp_f32 v0, v0, v1
 ; VI-NEXT:    v_log_f32_e32 v0, v0
 ; VI-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3836,8 +3769,7 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) {
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX900-NEXT:    s_mov_b32 s4, 0x800000
 ; GFX900-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
-; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc
 ; GFX900-NEXT:    v_ldexp_f32 v0, v0, v2
 ; GFX900-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-NEXT:    v_mov_b32_e32 v1, 0x42000000
@@ -3851,11 +3783,10 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) {
 ; GFX1100-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1100-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1100-NEXT:    v_cndmask_b32_e64 v2, 0, 32, vcc_lo
 ; GFX1100-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
-; GFX1100-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-NEXT:    v_sub_f32_e32 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
index 33e34e38a1837..0ac68c13d2703 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
@@ -1324,53 +1324,21 @@ define void @v_set_rounding_select_1_3(i32 %cond) {
 }
 
 define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
-; GFX6-LABEL: s_set_rounding_select_2_0:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX6-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX6-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX6-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX6-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX7-LABEL: s_set_rounding_select_2_0:
-; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX7-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX7-NEXT:    v_lshr_b32_e32 v0, 0xa50f, v0
-; GFX7-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX7-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: s_set_rounding_select_2_0:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX8-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX8-NEXT:    s_mov_b32 s34, 0xa50f
-; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
-; GFX8-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX678-LABEL: s_set_rounding_select_2_0:
+; GFX678:       ; %bb.0:
+; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
+; GFX678-NEXT:    s_movk_i32 s34, 0xa5
+; GFX678-NEXT:    s_cselect_b32 s34, s34, 0xa50f
+; GFX678-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
+; GFX678-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: s_set_rounding_select_2_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX9-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT:    s_mov_b32 s34, 0xa50f
-; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s34
-; GFX9-NEXT:    v_readfirstlane_b32 s34, v0
+; GFX9-NEXT:    s_movk_i32 s34, 0xa5
+; GFX9-NEXT:    s_cselect_b32 s34, s34, 0xa50f
 ; GFX9-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1378,11 +1346,8 @@ define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX10-NEXT:    s_cselect_b32 s34, -1, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s34
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
+; GFX10-NEXT:    s_movk_i32 s34, 0xa5
+; GFX10-NEXT:    s_cselect_b32 s34, s34, 0xa50f
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1390,11 +1355,8 @@ define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, 0xa50f
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11-NEXT:    s_movk_i32 s0, 0xa5
+; GFX11-NEXT:    s_cselect_b32 s0, s0, 0xa50f
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %cond, 0
@@ -1530,10 +1492,7 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX678-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX678-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX678-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX678-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX678-NEXT:    s_lshl_b32 s34, s34, 2
+; GFX678-NEXT:    s_cselect_b32 s34, 4, 0
 ; GFX678-NEXT:    s_add_i32 s35, s34, -4
 ; GFX678-NEXT:    s_min_u32 s34, s34, s35
 ; GFX678-NEXT:    s_lshl_b32 s36, s34, 2
@@ -1547,10 +1506,7 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX9-NEXT:    s_cselect_b64 s[34:35], -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[34:35]
-; GFX9-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX9-NEXT:    s_lshl_b32 s34, s34, 2
+; GFX9-NEXT:    s_cselect_b32 s34, 4, 0
 ; GFX9-NEXT:    s_add_i32 s35, s34, -4
 ; GFX9-NEXT:    s_min_u32 s34, s34, s35
 ; GFX9-NEXT:    s_lshl_b32 s36, s34, 2
@@ -1564,10 +1520,7 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX10-NEXT:    s_cselect_b32 s34, -1, 0
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s34
-; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
-; GFX10-NEXT:    s_lshl_b32 s34, s34, 2
+; GFX10-NEXT:    s_cselect_b32 s34, 4, 0
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
 ; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
@@ -1581,10 +1534,7 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
-; GFX11-NEXT:    s_cselect_b32 s0, -1, 0
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX11-NEXT:    s_cselect_b32 s0, 4, 0
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
 ; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
index e7b405d7d9270..24a4d8fbde200 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
@@ -519,8 +519,8 @@ define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(ptr addr
 ; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s6, 1
-; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-NEXT:    s_cselect_b32 s4, 1, 0
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 entry:
@@ -561,8 +561,8 @@ define amdgpu_kernel void @alloca_promote_cmpxchg_private(ptr addrspace(1) %out,
 ; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_cmp_eq_u32 s6, 1
-; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-NEXT:    s_cselect_b32 s4, 1, 0
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
index 1e6106896e0a4..e5e3ba6cdcaf0 100644
--- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
@@ -59,13 +59,9 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: v_s_log_f32:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0x800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, s0, v0
-; GFX12-SDAG-NEXT:    s_and_b32 s0, s1, exec_lo
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, s0, s1
 ; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
 ; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
@@ -305,12 +301,8 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
 ; GFX12-SDAG-NEXT:    s_and_b32 s1, s0, 0x7fffffff
 ; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
 ; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s1, 0x800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX12-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, |s0|, v0
-; GFX12-SDAG-NEXT:    s_and_b32 s0, s1, exec_lo
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, |s0|, s1
 ; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
 ; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
 ; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
@@ -342,13 +334,9 @@ define amdgpu_cs float @srcmods_neg_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: srcmods_neg_f32:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_cmp_gt_f32 s0, 0x80800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
-; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, -s0, v0
-; GFX12-SDAG-NEXT:    s_and_b32 s0, s1, exec_lo
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, -s0, s1
 ; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
 ; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 554e3640221b9..b78cbb0ac29cf 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -19,12 +19,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -106,10 +106,10 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -199,12 +199,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SI-SDAG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; SI-SDAG-NEXT:    s_and_b64 s[0:1], s[2:3], exec
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-NEXT:    s_mov_b32 s2, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -286,10 +286,10 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; VI-SDAG-NEXT:    v_cmp_lt_f64_e64 s[2:3], |s[0:1]|, v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |s[0:1]|, v0
-; VI-SDAG-NEXT:    s_and_b64 s[0:1], s[2:3], exec
 ; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -380,12 +380,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -467,10 +467,10 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -560,12 +560,12 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 9
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-NEXT:    s_mov_b32 s2, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -647,10 +647,10 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 9
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -s[0:1], v0
-; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -741,12 +741,12 @@ define double @v_rsq_f64(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -827,8 +827,8 @@ define double @v_rsq_f64(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -910,12 +910,12 @@ define double @v_rsq_f64_fabs(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -996,8 +996,8 @@ define double @v_rsq_f64_fabs(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], |v[0:1]|, v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -1080,12 +1080,12 @@ define double @v_rsq_f64_missing_contract0(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -1166,8 +1166,8 @@ define double @v_rsq_f64_missing_contract0(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -1249,12 +1249,12 @@ define double @v_rsq_f64_missing_contract1(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -1335,8 +1335,8 @@ define double @v_rsq_f64_missing_contract1(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -1418,12 +1418,12 @@ define double @v_neg_rsq_f64(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -1504,8 +1504,8 @@ define double @v_neg_rsq_f64(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -1588,23 +1588,22 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
@@ -1743,45 +1742,44 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], 1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
 ; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
@@ -1890,23 +1888,22 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
@@ -2045,45 +2042,44 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], -1.0
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], -1.0, v[2:3], -1.0
 ; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
@@ -2191,12 +2187,12 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -2315,8 +2311,8 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -2434,23 +2430,22 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 8, v6
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v6
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; SI-SDAG-NEXT:    s_mov_b32 s6, 0xbff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
+; SI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
-; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[10:11], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], v[18:19], v[6:7], v[10:11]
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
@@ -2592,45 +2587,44 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
-; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
-; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
-; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
+; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[10:11], v[0:1], v[6:7]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[6:7], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
+; VI-SDAG-NEXT:    v_mul_f64 v[8:9], v[2:3], v[4:5]
+; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
 ; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
-; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
+; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
-; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
-; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
+; VI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[8:9], v[2:3]
+; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
-; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
+; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v9
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v10
-; VI-SDAG-NEXT:    v_ldexp_f64 v[6:7], v[6:7], v8
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[5:6], s[6:7], v[0:1], v[0:1], -1.0
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[7:8], s[4:5], v[2:3], v[2:3], 1.0
 ; VI-SDAG-NEXT:    v_div_scale_f64 v[17:18], s[4:5], 1.0, v[2:3], 1.0
 ; VI-SDAG-NEXT:    v_rcp_f64_e32 v[9:10], v[5:6]
@@ -2738,12 +2732,12 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 9
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -2824,8 +2818,8 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 9
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], -|v[0:1]|, v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -2909,12 +2903,12 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -2995,8 +2989,8 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3078,12 +3072,12 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3148,8 +3142,8 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3223,12 +3217,12 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3293,8 +3287,8 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3368,12 +3362,12 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3439,8 +3433,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3515,12 +3509,12 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3585,8 +3579,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3660,12 +3654,12 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3730,8 +3724,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3805,12 +3799,12 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -3875,8 +3869,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -3950,12 +3944,12 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -4021,8 +4015,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -4097,12 +4091,12 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0x3ff00000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -4183,8 +4177,8 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -4266,40 +4260,39 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SI-SDAG-NEXT:    v_mov_b32_e32 v12, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v12, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
+; SI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
+; SI-SDAG-NEXT:    v_mov_b32_e32 v14, 0xffffff80
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v15, 0x260
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v8, 8, v8
+; SI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v12, s[4:5]
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[8:9], v[0:1]
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[4:5], v[6:7]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[0:1], v[8:9]
 ; SI-SDAG-NEXT:    v_mul_f64 v[8:9], v[8:9], 0.5
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v12
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[6:7], 0.5
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v15
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
 ; SI-SDAG-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[6:7], v[0:1]
-; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[12:13], v[8:9], v[6:7]
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[0:1]
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v15
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[10:11], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v14, s[4:5]
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
@@ -4391,11 +4384,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e64 s[4:5], s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
-; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
+; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v5
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v4
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[6:7], v[0:1]
@@ -4417,8 +4409,8 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-SDAG-NEXT:    v_fma_f64 v[14:15], -v[10:11], v[10:11], v[0:1]
 ; VI-SDAG-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; VI-SDAG-NEXT:    v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; VI-SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v8, vcc
 ; VI-SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; VI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v9
@@ -4519,12 +4511,12 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
 ; SI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; SI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0x260
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; SI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; SI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; SI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
-; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
+; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
@@ -4590,10 +4582,10 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
 ; VI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-SDAG-NEXT:    v_bfrev_b32_e32 v1, 8
 ; VI-SDAG-NEXT:    v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
+; VI-SDAG-NEXT:    s_and_b64 s[2:3], vcc, exec
+; VI-SDAG-NEXT:    s_cselect_b32 s2, 0x100, 0
+; VI-SDAG-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], s[0:1], v0
-; VI-SDAG-NEXT:    s_and_b64 s[0:1], vcc, exec
 ; VI-SDAG-NEXT:    s_cselect_b32 s0, 0xffffff80, 0
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -4675,12 +4667,12 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
@@ -4745,8 +4737,8 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
@@ -5074,24 +5066,24 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_mov_b32_e32 v11, 0x260
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v10, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v11
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x260
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v6
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
@@ -5158,8 +5150,8 @@ define double @v_div_contract_sqrt_f64(double %x, double %y) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
@@ -5241,24 +5233,24 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_mov_b32_e32 v11, 0x260
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v10, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v11
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x260
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v6
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
@@ -5325,8 +5317,8 @@ define double @v_div_arcp_sqrt_f64(double %x, double %y) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
@@ -5408,24 +5400,24 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; SI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
-; SI-SDAG-NEXT:    v_mov_b32_e32 v11, 0x260
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[4:5], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
-; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
-; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[8:9], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[6:7], v[2:3]
+; SI-SDAG-NEXT:    v_fma_f64 v[6:7], v[10:11], v[4:5], v[6:7]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v10, 0xffffff80
 ; SI-SDAG-NEXT:    v_fma_f64 v[8:9], -v[6:7], v[6:7], v[2:3]
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[6:7]
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v6, 0, v10, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[4:5], v[4:5], v6
-; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v11
+; SI-SDAG-NEXT:    v_mov_b32_e32 v6, 0x260
+; SI-SDAG-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v6
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; SI-SDAG-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
@@ -5492,8 +5484,8 @@ define double @v_div_contract_arcp_sqrt_f64(double %x, double %y) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[2:3]
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v4
+; VI-SDAG-NEXT:    v_mov_b32_e32 v4, 0x100
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v4
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[4:5], v[2:3]
 ; VI-SDAG-NEXT:    v_mul_f64 v[6:7], v[2:3], v[4:5]
@@ -5575,17 +5567,17 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
 ; SI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; SI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; SI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
-; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
-; SI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; SI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
+; SI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; SI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
-; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
+; SI-SDAG-NEXT:    v_mov_b32_e32 v8, 0xffffff80
 ; SI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
+; SI-SDAG-NEXT:    v_mov_b32_e32 v9, 0x260
 ; SI-SDAG-NEXT:    s_mov_b32 s6, 0
 ; SI-SDAG-NEXT:    s_mov_b32 s7, 0x40700000
-; SI-SDAG-NEXT:    s_mov_b32 s8, 0x40700000
 ; SI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
 ; SI-SDAG-NEXT:    v_mul_f64 v[2:3], v[2:3], 0.5
+; SI-SDAG-NEXT:    s_mov_b32 s8, 0x40700000
 ; SI-SDAG-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
 ; SI-SDAG-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
 ; SI-SDAG-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
@@ -5665,10 +5657,10 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_brev_b32 s5, 8
 ; VI-SDAG-NEXT:    v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; VI-SDAG-NEXT:    v_mov_b32_e32 v2, 0x100
 ; VI-SDAG-NEXT:    s_mov_b32 s4, 0
 ; VI-SDAG-NEXT:    s_mov_b32 s5, 0x40700000
-; VI-SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; VI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
+; VI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; VI-SDAG-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v2
 ; VI-SDAG-NEXT:    v_rsq_f64_e32 v[2:3], v[0:1]
 ; VI-SDAG-NEXT:    v_mul_f64 v[4:5], v[0:1], v[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
index 0af8c95da8d8b..0b277f88e513e 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -15,14 +15,11 @@ target datalayout = "A5"
 ; GCN-ALLOCA:         buffer_load_dword
 
 ; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 1
-; GCN-PROMOTE: s_cselect_b64 [[CC1:[^,]+]], -1, 0
+; GCN-PROMOTE: s_cselect_b32 [[IND1:s[0-9]+]], 1, 0
 ; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 2
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND1:v[0-9]+]], 0, 1, [[CC1]]
-; GCN-PROMOTE: s_cselect_b64 vcc, -1, 0
+; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], [[IND1]], 2
 ; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 3
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND2:v[0-9]+]], 2, [[IND1]], vcc
-; GCN-PROMOTE: s_cselect_b64 vcc, -1, 0
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND3:v[0-9]+]], 3, [[IND2]], vcc
+; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], [[IND2]], 3
 ; GCN-PROMOTE: ScratchSize: 0
 
 define amdgpu_kernel void @vector_read_alloca_bitcast(ptr addrspace(1) %out, i32 %index) {
@@ -51,7 +48,7 @@ entry:
 ; GCN-ALLOCA-COUNT-5: buffer_store_dword
 ; GCN-ALLOCA:         buffer_load_dword
 
-; GCN-PROMOTE-COUNT-7: v_cndmask
+; GCN-PROMOTE-COUNT-7: s_cselect_b32
 
 ; GCN-PROMOTE: ScratchSize: 0
 
@@ -292,14 +289,11 @@ entry:
 ; GCN-ALLOCA:         buffer_load_dword
 
 ; GCN-PROMOTE: s_cmp_eq_u32 s{{[0-9]+}}, 1
-; GCN-PROMOTE: s_cselect_b64 [[CC1:[^,]+]], -1, 0
+; GCN-PROMOTE: s_cselect_b32 [[IND1:s[0-9]+]], 1, 0
 ; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 2
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND1:v[0-9]+]], 0, 1, [[CC1]]
-; GCN-PROMOTE: s_cselect_b64 vcc, -1, 0
+; GCN-PROMOTE: s_cselect_b32 [[IND2:s[0-9]+]], [[IND1]], 2
 ; GCN-PROMOTE: s_cmp_lg_u32 s{{[0-9]+}}, 3
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND2:v[0-9]+]], 2, [[IND1]], vcc
-; GCN-PROMOTE: s_cselect_b64 vcc, -1, 0
-; GCN-PROMOTE: v_cndmask_b32_e{{32|64}} [[IND3:v[0-9]+]], 3, [[IND2]], vcc
+; GCN-PROMOTE: s_cselect_b32 [[IND3:s[0-9]+]], [[IND2]], 3
 
 ; GCN-PROMOTE: ScratchSize: 0
 
diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll
index 6427a3e34cf8e..186276b50ceeb 100644
--- a/llvm/test/CodeGen/ARM/select-imm.ll
+++ b/llvm/test/CodeGen/ARM/select-imm.ll
@@ -295,15 +295,13 @@ define i32 @t7(i32 %a, i32 %b) nounwind readnone {
 ; ARM-LABEL: t7:
 ; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    subs r0, r0, r1
-; ARM-NEXT:    movne r0, #1
-; ARM-NEXT:    lsl r0, r0, #2
+; ARM-NEXT:    movne r0, #4
 ; ARM-NEXT:    mov pc, lr
 ;
 ; ARMT2-LABEL: t7:
 ; ARMT2:       @ %bb.0: @ %entry
 ; ARMT2-NEXT:    subs r0, r0, r1
-; ARMT2-NEXT:    movwne r0, #1
-; ARMT2-NEXT:    lsl r0, r0, #2
+; ARMT2-NEXT:    movwne r0, #4
 ; ARMT2-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: t7:
@@ -318,8 +316,7 @@ define i32 @t7(i32 %a, i32 %b) nounwind readnone {
 ; THUMB2:       @ %bb.0: @ %entry
 ; THUMB2-NEXT:    subs r0, r0, r1
 ; THUMB2-NEXT:    it ne
-; THUMB2-NEXT:    movne r0, #1
-; THUMB2-NEXT:    lsls r0, r0, #2
+; THUMB2-NEXT:    movne r0, #4
 ; THUMB2-NEXT:    bx lr
 ;
 ; V8MBASE-LABEL: t7:
@@ -824,15 +821,13 @@ define i32 @t12(i32 %a) nounwind {
 ; ARM-LABEL: t12:
 ; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    cmp r0, #0
-; ARM-NEXT:    movne r0, #1
-; ARM-NEXT:    lsl r0, r0, #1
+; ARM-NEXT:    movne r0, #2
 ; ARM-NEXT:    mov pc, lr
 ;
 ; ARMT2-LABEL: t12:
 ; ARMT2:       @ %bb.0: @ %entry
 ; ARMT2-NEXT:    cmp r0, #0
-; ARMT2-NEXT:    movwne r0, #1
-; ARMT2-NEXT:    lsl r0, r0, #1
+; ARMT2-NEXT:    movwne r0, #2
 ; ARMT2-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: t12:
@@ -846,8 +841,7 @@ define i32 @t12(i32 %a) nounwind {
 ; THUMB2:       @ %bb.0: @ %entry
 ; THUMB2-NEXT:    cmp r0, #0
 ; THUMB2-NEXT:    it ne
-; THUMB2-NEXT:    movne r0, #1
-; THUMB2-NEXT:    lsls r0, r0, #1
+; THUMB2-NEXT:    movne r0, #2
 ; THUMB2-NEXT:    bx lr
 ;
 ; V8MBASE-LABEL: t12:
diff --git a/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll b/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll
index 8166c4688f3d3..1ffae4f982fc2 100644
--- a/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll
+++ b/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll
@@ -115,13 +115,12 @@ define i16 @testShiftAnd_1(i16 %x) {
 ; CHECK-LABEL: testShiftAnd_1:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    mov r12, r13
-; CHECK-NEXT:    mov #1, r12
+; CHECK-NEXT:    mov #2, r12
 ; CHECK-NEXT:    tst r13
 ; CHECK-NEXT:    jl .LBB6_2
 ; CHECK-NEXT:  ; %bb.1: ; %entry
 ; CHECK-NEXT:    clr r12
 ; CHECK-NEXT:  .LBB6_2: ; %entry
-; CHECK-NEXT:    add r12, r12
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp slt i16 %x, 0
diff --git a/llvm/test/CodeGen/Thumb/branchless-cmp.ll b/llvm/test/CodeGen/Thumb/branchless-cmp.ll
index 40c5b8853da9c..e5bfb87f00c92 100644
--- a/llvm/test/CodeGen/Thumb/branchless-cmp.ll
+++ b/llvm/test/CodeGen/Thumb/branchless-cmp.ll
@@ -1,101 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m0 %s -verify-machineinstrs -o - | FileCheck %s
 
 define i32 @test1a(i32 %a, i32 %b) {
+; CHECK-LABEL: test1a:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r0, r0, r1
+; CHECK-NEXT:    subs r1, r0, #1
+; CHECK-NEXT:    sbcs r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp ne i32 %a, %b
   %cond = zext i1 %cmp to i32
   ret i32 %cond
-; CHECK-LABEL: test1a:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs r0, r0, r1
-; CHECK-NEXT:  subs r1, r0, #1
-; CHECK-NEXT:  sbcs r0, r1
 }
 
 define i32 @test1b(i32 %a, i32 %b) {
+; CHECK-LABEL: test1b:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r1, r0, r1
+; CHECK-NEXT:    rsbs r0, r1, #0
+; CHECK-NEXT:    adcs r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp eq i32 %a, %b
   %cond = zext i1 %cmp to i32
   ret i32 %cond
-; CHECK-LABEL: test1b:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs    r1, r0, r1
-; CHECK-NEXT:  rsbs    r0, r1, #0
-; CHECK-NEXT:  adcs    r0, r1
 }
 
 define i32 @test2a(i32 %a, i32 %b) {
+; CHECK-LABEL: test2a:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r1, r0, r1
+; CHECK-NEXT:    rsbs r0, r1, #0
+; CHECK-NEXT:    adcs r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp eq i32 %a, %b
   %cond = zext i1 %cmp to i32
   ret i32 %cond
-; CHECK-LABEL: test2a:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs    r1, r0, r1
-; CHECK-NEXT:  rsbs    r0, r1, #0
-; CHECK-NEXT:  adcs    r0, r1
 }
 
 define i32 @test2b(i32 %a, i32 %b) {
+; CHECK-LABEL: test2b:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r0, r0, r1
+; CHECK-NEXT:    subs r1, r0, #1
+; CHECK-NEXT:    sbcs r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp ne i32 %a, %b
   %cond = zext i1 %cmp to i32
   ret i32 %cond
-; CHECK-LABEL: test2b:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs    r0, r0, r1
-; CHECK-NEXT:  subs    r1, r0, #1
-; CHECK-NEXT:  sbcs    r0, r1
 }
 
 define i32 @test3a(i32 %a, i32 %b) {
+; CHECK-LABEL: test3a:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r0, r0, r1
+; CHECK-NEXT:    beq .LBB4_2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:  .LBB4_2: @ %entry
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp eq i32 %a, %b
   %cond = select i1 %cmp, i32 0, i32 4
   ret i32 %cond
-; CHECK-LABEL: test3a:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs    r0, r0, r1
-; CHECK-NEXT:  subs    r1, r0, #1
-; CHECK-NEXT:  sbcs    r0, r1
-; CHECK-NEXT:  lsls    r0, r0, #2
 }
 
 define i32 @test3b(i32 %a, i32 %b) {
+; CHECK-LABEL: test3b:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, r1
+; CHECK-NEXT:    beq .LBB5_2
+; CHECK-NEXT:  @ %bb.1: @ %entry
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp eq i32 %a, %b
   %cond = select i1 %cmp, i32 4, i32 0
   ret i32 %cond
-; CHECK-LABEL: test3b:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:      subs	r0, r0, r1
-; CHECK-NEXT: rsbs	r1, r0, #0
-; CHECK-NEXT: adcs	r1, r0
-; CHECK-NEXT: lsls	r0, r1, #2
 }
 
 define i32 @test4a(i32 %a, i32 %b) {
+; CHECK-LABEL: test4a:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, r1
+; CHECK-NEXT:    bne .LBB6_2
+; CHECK-NEXT:  @ %bb.1: @ %entry
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp ne i32 %a, %b
   %cond = select i1 %cmp, i32 0, i32 4
   ret i32 %cond
-; CHECK-LABEL: test4a:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:      subs	r0, r0, r1
-; CHECK-NEXT: rsbs	r1, r0, #0
-; CHECK-NEXT: adcs	r1, r0
-; CHECK-NEXT: lsls	r0, r1, #2
 }
 
 define i32 @test4b(i32 %a, i32 %b) {
+; CHECK-LABEL: test4b:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    subs r0, r0, r1
+; CHECK-NEXT:    subs r1, r0, #1
+; CHECK-NEXT:    sbcs r0, r1
+; CHECK-NEXT:    lsls r0, r0, #2
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp ne i32 %a, %b
   %cond = select i1 %cmp, i32 4, i32 0
   ret i32 %cond
-; CHECK-LABEL: test4b:
-; CHECK-NOT: b{{(ne)|(eq)}}
-; CHECK:       subs  r0, r0, r1
-; CHECK-NEXT:  subs  r1, r0, #1
-; CHECK-NEXT:  sbcs  r0, r1
-; CHECK-NEXT:  lsls  r0, r0, #2
 }



More information about the llvm-commits mailing list