[llvm] [WIP] [AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp instruction. (PR #120104)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 27 00:37:37 PST 2024
https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/120104
>From 3f7b4082f3ef7f30a03b6886d41276a09bb58464 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Mon, 16 Dec 2024 16:10:02 +0000
Subject: [PATCH 1/6] [AMDGPU] [GlobalIsel] Combine Fmul with Select into
ldexp.
This combine pattern perform the below transformation.
fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
where, A=2^a & B=2^b ; a and b are integers.
It is a follow-up PR to implement the above combine for globalIsel,
as it has been done for SelectionDAG Isel (PR-111109)
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 10 +++
.../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 72 +++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 4 ++
3 files changed, 86 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 985fa8f1deff94..c1eea0ad9b7073 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
+// Do the following combines :
+// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
+// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
+def combine_fmul_with_select_to_ldexp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FMUL $dst, $x, $select):$root,
+ (G_SELECT $select, $y, $A, $B):$sel,
+ [{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e5a376ab7357c1..d582ee892a481e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -445,3 +445,75 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
MI.eraseFromParent();
}
+
+bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMUL);
+ assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(Dst);
+ LLT ScalarDestTy = DestTy.getScalarType();
+
+ if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() ||
+ ScalarDestTy == LLT::float16()) &&
+ (MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) {
+ Register SelectCond = Sel.getOperand(1).getReg();
+ Register SelectTrue = Sel.getOperand(2).getReg();
+ Register SelectFalse = Sel.getOperand(3).getReg();
+
+ const auto SelectTrueCst =
+ DestTy.isVector()
+ ? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(SelectTrue, MRI);
+ if (!SelectTrueCst)
+ return false;
+ const auto SelectFalseCst =
+ DestTy.isVector()
+ ? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true)
+ : getFConstantVRegValWithLookThrough(SelectFalse, MRI);
+ if (!SelectFalseCst)
+ return false;
+
+ if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative())
+ return false;
+
+ // For f32, only non-inline constants should be transformed.
+ const SIInstrInfo *TII =
+ (MI.getMF()->getSubtarget<GCNSubtarget>()).getInstrInfo();
+ if (ScalarDestTy == LLT::float32() &&
+ TII->isInlineConstant(SelectTrueCst->Value) &&
+ TII->isInlineConstant(SelectFalseCst->Value))
+ return false;
+
+ int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs();
+ if (SelectTrueVal == INT_MIN)
+ return false;
+ int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs();
+ if (SelectFalseVal == INT_MIN)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
+ LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
+ auto NewSel =
+ Builder.buildSelect(IntDestTy, SelectCond,
+ Builder.buildConstant(IntDestTy, SelectTrueVal),
+ Builder.buildConstant(IntDestTy, SelectFalseVal));
+
+ if (SelectTrueCst->Value.isNegative()) {
+ auto NegX = Builder.buildFNeg(
+ DestTy, MI.getOperand(1).getReg(),
+ MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags());
+ Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
+ } else {
+ Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel,
+ MI.getFlags());
+ }
+ };
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 6510abe9d23218..df03a9435b3849 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -30,6 +30,10 @@ class AMDGPUCombinerHelper : public CombinerHelper {
Register Src1, Register Src2);
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);
+
+ bool matchCombineFmulWithSelectToLdexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
};
} // namespace llvm
>From 5bb2713d2fe3ecba979fdf3ac3867174031dbf1f Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 18 Dec 2024 06:54:22 +0000
Subject: [PATCH 2/6] Invoking the fmulCombine in both the
AMDGPUPreLegalCombine and AMDGPUPostLegalCombiner.
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 14 +++++++-------
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 6 +++---
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c1eea0ad9b7073..da47aaf8a3b5c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -125,14 +125,14 @@ def sign_extension_in_reg : GICombineRule<
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
// Do the following combines :
-// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
-// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
-def combine_fmul_with_select_to_ldexp : GICombineRule<
+// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b))
+// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b))
+def combine_fmul_with_select_to_fldexp : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_FMUL $dst, $x, $select):$root,
(G_SELECT $select, $y, $A, $B):$sel,
- [{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]),
- (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+ [{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
@@ -163,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
- [all_combines, clamp_i64_to_i16, foldable_fneg]> {
+ [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
- [all_combines, gfx6gfx7_combines, gfx8_combines,
+ [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index d582ee892a481e..06da76d5049e5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -446,7 +446,7 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
MI.eraseFromParent();
}
-bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp(
+bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
MachineInstr &MI, MachineInstr &Sel,
std::function<void(MachineIRBuilder &)> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_FMUL);
@@ -465,13 +465,13 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp(
const auto SelectTrueCst =
DestTy.isVector()
- ? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true)
+ ? getFConstantSplat(SelectTrue, MRI, /*allowUndef=*/false)
: getFConstantVRegValWithLookThrough(SelectTrue, MRI);
if (!SelectTrueCst)
return false;
const auto SelectFalseCst =
DestTy.isVector()
- ? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true)
+ ? getFConstantSplat(SelectFalse, MRI, /*allowUndef=*/false)
: getFConstantVRegValWithLookThrough(SelectFalse, MRI);
if (!SelectFalseCst)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index df03a9435b3849..9a0a4205ed54be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -31,7 +31,7 @@ class AMDGPUCombinerHelper : public CombinerHelper {
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);
- bool matchCombineFmulWithSelectToLdexp(
+ bool matchCombineFmulWithSelectToFldexp(
MachineInstr &MI, MachineInstr &Sel,
std::function<void(MachineIRBuilder &)> &MatchInfo);
};
>From b6e7c69211daf7622003b022e3b24d601eb25428 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Wed, 18 Dec 2024 07:06:17 +0000
Subject: [PATCH 3/6] updated the testCases, and increased the coverage of
dagCombine-fmul-to-ldexp to include globalIsel in pipeline as well.
---
llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 769 +--
.../CodeGen/AMDGPU/GlobalISel/llvm.powi.ll | 95 +-
.../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 5208 ++++++++++-------
llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll | 10 +-
llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll | 2114 ++++---
llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll | 28 +-
llvm/test/CodeGen/AMDGPU/llvm.exp2.ll | 1230 ++--
llvm/test/CodeGen/AMDGPU/llvm.log.ll | 1209 ++--
llvm/test/CodeGen/AMDGPU/llvm.log10.ll | 1209 ++--
llvm/test/CodeGen/AMDGPU/llvm.log2.ll | 1542 +++--
.../AMDGPU/pseudo-scalar-transcendental.ll | 97 +-
11 files changed, 7952 insertions(+), 5559 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
index 0577117e9d9e1d..d81faf91801b0b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
@@ -10,10 +10,10 @@ define float @v_pow_f32(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -25,19 +25,19 @@ define float @v_pow_f32(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -49,19 +49,19 @@ define float @v_pow_f32(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -73,17 +73,18 @@ define float @v_pow_f32(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -91,32 +92,34 @@ define float @v_pow_f32(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
@@ -127,111 +130,114 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v4, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v5, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
-; GFX6-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
+; GFX6-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v5
+; GFX6-NEXT: v_lshlrev_b32_e32 v4, 5, v4
; GFX6-NEXT: v_log_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v4
; GFX6-NEXT: v_log_f32_e32 v1, v1
-; GFX6-NEXT: v_mov_b32_e32 v6, 0x42000000
-; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GFX6-NEXT: v_sub_f32_e32 v0, v0, v7
-; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x42000000
+; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; GFX6-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5]
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; GFX6-NEXT: v_sub_f32_e32 v1, v1, v5
-; GFX6-NEXT: v_mov_b32_e32 v7, 0x42800000
+; GFX6-NEXT: v_mov_b32_e32 v6, 0x42800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
-; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
-; GFX6-NEXT: v_add_f32_e32 v0, v0, v8
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5]
; GFX6-NEXT: v_exp_f32_e32 v0, v0
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v1, v1
-; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX6-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
-; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_not_b32_e32 v4, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_v2f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v4, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v5, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX8-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v6
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
+; GFX8-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX8-NEXT: v_lshlrev_b32_e32 v4, 5, v4
; GFX8-NEXT: v_log_f32_e32 v0, v0
-; GFX8-NEXT: v_mul_f32_e32 v1, v1, v4
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v4
; GFX8-NEXT: v_log_f32_e32 v1, v1
-; GFX8-NEXT: v_mov_b32_e32 v6, 0x42000000
-; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GFX8-NEXT: v_sub_f32_e32 v0, v0, v7
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x42000000
+; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; GFX8-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5]
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; GFX8-NEXT: v_sub_f32_e32 v1, v1, v5
-; GFX8-NEXT: v_mov_b32_e32 v7, 0x42800000
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x42800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
-; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
-; GFX8-NEXT: v_add_f32_e32 v0, v0, v8
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5]
; GFX8-NEXT: v_exp_f32_e32 v0, v0
; GFX8-NEXT: v_add_f32_e32 v1, v1, v2
; GFX8-NEXT: v_exp_f32_e32 v1, v1
-; GFX8-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
-; GFX8-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX8-NEXT: v_not_b32_e32 v4, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX8-NEXT: v_ldexp_f32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v6
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5]
+; GFX9-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX9-NEXT: v_lshlrev_b32_e32 v4, 5, v4
; GFX9-NEXT: v_log_f32_e32 v0, v0
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4
+; GFX9-NEXT: v_ldexp_f32 v1, v1, v4
; GFX9-NEXT: v_log_f32_e32 v1, v1
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x42000000
-; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
-; GFX9-NEXT: v_sub_f32_e32 v0, v0, v7
-; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5]
+; GFX9-NEXT: v_mov_b32_e32 v5, 0x42000000
+; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
+; GFX9-NEXT: v_sub_f32_e32 v0, v0, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5]
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; GFX9-NEXT: v_sub_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_mov_b32_e32 v7, 0x42800000
+; GFX9-NEXT: v_mov_b32_e32 v6, 0x42800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
-; GFX9-NEXT: v_add_f32_e32 v0, v0, v8
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5]
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v7
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5]
; GFX9-NEXT: v_exp_f32_e32 v0, v0
; GFX9-NEXT: v_add_f32_e32 v1, v1, v2
; GFX9-NEXT: v_exp_f32_e32 v1, v1
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-NEXT: v_not_b32_e32 v4, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5]
+; GFX9-NEXT: v_ldexp_f32 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f32:
@@ -239,10 +245,12 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s4
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4
-; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4
+; GFX10-NEXT: v_lshlrev_b32_e32 v4, 5, v4
+; GFX10-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX10-NEXT: v_ldexp_f32 v1, v1, v5
; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s4
; GFX10-NEXT: v_log_f32_e32 v0, v0
@@ -257,46 +265,54 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s4
; GFX10-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-NEXT: v_add_f32_e32 v1, v1, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s4
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s4
; GFX10-NEXT: v_exp_f32_e32 v0, v0
; GFX10-NEXT: v_exp_f32_e32 v1, v1
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX10-NEXT: v_ldexp_f32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s0
-; GFX11-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
-; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_ldexp_f32 v1, v1, v5
; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: v_log_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v2 :: v_dual_mul_dx9_zero_f32 v1, v1, v3
-; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_sub_f32 v1, v1, v5 :: v_dual_lshlrev_b32 v4, 5, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v1, v1, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s0
-; GFX11-NEXT: v_exp_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_add_f32 v1, v1, v3
+; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v1, v1
+; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX11-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_exp_f32_e32 v0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfff
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
ret <2 x float> %pow
@@ -316,9 +332,9 @@ define half @v_pow_f16(half %x, half %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@@ -388,18 +404,18 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc
+; GFX6-NEXT: v_not_b32_e32 v3, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc
; GFX6-NEXT: v_exp_f32_e32 v0, v0
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v1, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,17 +524,17 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GFX6-NEXT: v_add_f32_e32 v1, v1, v5
-; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; GFX6-NEXT: v_not_b32_e32 v5, 63
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_exp_f32_e32 v1, v1
; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
; GFX6-NEXT: v_exp_f32_e32 v2, v0
-; GFX6-NEXT: v_mul_f32_e32 v0, v1, v6
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v5, vcc
-; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v6
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v2, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -634,17 +650,17 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v5
-; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; GFX6-NEXT: v_not_b32_e32 v5, 63
; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
; GFX6-NEXT: v_add_f32_e32 v1, v1, v2
; GFX6-NEXT: v_exp_f32_e32 v0, v0
; GFX6-NEXT: v_exp_f32_e32 v1, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6
-; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -764,17 +780,17 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GFX6-NEXT: v_add_f32_e32 v2, v2, v5
-; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000
+; GFX6-NEXT: v_not_b32_e32 v5, 63
; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc
+; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
; GFX6-NEXT: v_exp_f32_e32 v2, v2
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v1, v0
-; GFX6-NEXT: v_mul_f32_e32 v0, v2, v6
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc
-; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v6
+; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -885,10 +901,10 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -900,19 +916,19 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -924,19 +940,19 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -948,17 +964,18 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0|
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
-; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -966,9 +983,9 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_lhs:
@@ -976,23 +993,24 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%pow = call float @llvm.pow.f32(float %fabs.x, float %y)
@@ -1004,10 +1022,10 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1019,19 +1037,19 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1043,19 +1061,19 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1067,17 +1085,18 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -1085,32 +1104,34 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.y = call float @llvm.fabs.f32(float %y)
%pow = call float @llvm.pow.f32(float %x, float %fabs.y)
@@ -1122,10 +1143,10 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1137,19 +1158,19 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1161,19 +1182,19 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1185,17 +1206,18 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0|
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
-; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -1203,9 +1225,9 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
@@ -1213,23 +1235,24 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fabs.y = call float @llvm.fabs.f32(float %y)
@@ -1241,10 +1264,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX6-NEXT: v_mul_f32_e32 v1, s0, v1
+; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX6-NEXT: v_ldexp_f32_e32 v1, s0, v1
; GFX6-NEXT: v_log_f32_e32 v1, v1
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1256,18 +1279,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX8-NEXT: v_mul_f32_e32 v1, s0, v1
+; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX8-NEXT: v_ldexp_f32 v1, s0, v1
; GFX8-NEXT: v_log_f32_e32 v1, v1
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1279,18 +1302,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX9-NEXT: v_mul_f32_e32 v1, s0, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX9-NEXT: v_ldexp_f32 v1, s0, v1
; GFX9-NEXT: v_log_f32_e32 v1, v1
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1302,49 +1325,51 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1
-; GFX10-NEXT: v_mul_f32_e32 v1, s0, v1
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX10-NEXT: v_ldexp_f32 v1, s0, v1
; GFX10-NEXT: v_log_f32_e32 v1, v1
; GFX10-NEXT: v_sub_f32_e32 v1, v1, v2
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1
-; GFX11-NEXT: v_mul_f32_e32 v1, s0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f32 v1, s0, v1
; GFX11-NEXT: v_log_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
@@ -1354,10 +1379,10 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1369,18 +1394,18 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1392,18 +1417,18 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1415,16 +1440,17 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1432,31 +1458,33 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
@@ -1466,10 +1494,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX6-NEXT: v_ldexp_f32_e32 v0, s0, v0
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1481,18 +1509,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX8-NEXT: v_ldexp_f32 v0, s0, v0
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1504,18 +1532,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX9-NEXT: v_ldexp_f32 v0, s0, v0
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -1527,49 +1555,51 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2
-; GFX10-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX10-NEXT: v_ldexp_f32 v0, s0, v0
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2
-; GFX11-NEXT: v_mul_f32_e32 v0, s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f32 v0, s0, v0
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: ; return to shader part epilog
%pow = call float @llvm.pow.f32(float %x, float %y)
ret float %pow
@@ -1580,10 +1610,10 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e64 v0, -v0, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1595,19 +1625,19 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, -v0, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1619,19 +1649,19 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fneg_lhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, -v0, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1643,17 +1673,18 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fneg_lhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, -v0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4
-; GFX10-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, -v0, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -1661,9 +1692,9 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fneg_lhs:
@@ -1671,23 +1702,24 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
-; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_ldexp_f32 v0, -v0, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
%pow = call float @llvm.pow.f32(float %neg.x, float %y)
@@ -1699,10 +1731,10 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2
; GFX6-NEXT: v_log_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1714,19 +1746,19 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX6-NEXT: v_add_f32_e32 v0, v0, v1
; GFX6-NEXT: v_exp_f32_e32 v0, v0
-; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_not_b32_e32 v1, 63
+; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_pow_f32_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
; GFX8-NEXT: v_log_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1738,19 +1770,19 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
; GFX8-NEXT: v_exp_f32_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_f32_fneg_rhs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
; GFX9-NEXT: v_log_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -1762,17 +1794,18 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX9-NEXT: v_add_f32_e32 v0, v0, v1
; GFX9-NEXT: v_exp_f32_e32 v0, v0
-; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_not_b32_e32 v1, 63
+; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_f32_fneg_rhs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
; GFX10-NEXT: v_log_f32_e32 v0, v0
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
@@ -1780,32 +1813,34 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
; GFX10-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
; GFX10-NEXT: v_exp_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_f32_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, -v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%neg.y = fneg float %y
%pow = call float @llvm.pow.f32(float %x, float %neg.y)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
index eeb7b138fde31a..fe002d69faf667 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
@@ -18,9 +18,9 @@ define i16 @v_powi_f16(i16 %l, i32 %r) {
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
; GFX7-NEXT: v_exp_f32_e32 v0, v0
-; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_not_b32_e32 v1, 63
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -75,53 +75,80 @@ define i16 @v_powi_f16(i16 %l, i32 %r) {
}
define float @v_powi_f32(float %l, i32 %r) {
-; GFX78-LABEL: v_powi_f32:
-; GFX78: ; %bb.0:
-; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX78-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX78-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX78-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
-; GFX78-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX78-NEXT: v_log_f32_e32 v0, v0
-; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX78-NEXT: v_mov_b32_e32 v2, 0x42000000
-; GFX78-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
-; GFX78-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX78-NEXT: v_exp_f32_e32 v0, v0
-; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX78-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: v_powi_f32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000
+; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_log_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000
+; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX7-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX7-NEXT: v_exp_f32_e32 v0, v0
+; GFX7-NEXT: v_not_b32_e32 v1, 63
+; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_powi_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
+; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_not_b32_e32 v1, 63
+; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_powi_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_log_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_exp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%res = call float @llvm.powi.f32.i32(float %l, i32 %r)
ret float %res
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
index 5b72795ba07eaa..b128be2186df29 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX7-LABEL: fmul_select_f32_test1:
@@ -21,22 +25,22 @@ define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 2.000000e+00, float 1.000000e+00
%ldexp = fmul float %x, %y
@@ -60,22 +64,22 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test2:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 5.000000e-01, float 1.000000e+00
%ldexp = fmul float %x, %y
@@ -83,49 +87,71 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2f32_test3:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2f32_test3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f32_test3:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f32_test3:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2f32_test3:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2f32_test3:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2f32_test3:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2f32_test3:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: fmul_select_v2f32_test3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_v2f32_test3:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
%ldexp = fmul <2 x float> %x, %y
@@ -133,49 +159,71 @@ define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1
}
define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2f32_test4:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2f32_test4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f32_test4:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f32_test4:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2f32_test4:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2f32_test4:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2f32_test4:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2f32_test4:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: fmul_select_v2f32_test4:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_v2f32_test4:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
%ldexp = fmul <2 x float> %x, %y
@@ -199,22 +247,22 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test5:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test5:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test5:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test5:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float -2.000000e+00, float -1.000000e+00
%ldexp = fmul float %x, %y
@@ -222,44 +270,83 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f32_test6:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f32_test6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f32_test6:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test6:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f32_test6:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f32_test6:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f32_test6:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f32_test6:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f32_test6:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f32_test6:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f32_test6:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f32_test6:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float -3.000000e+00, float 8.000000e+00
%ldexp = fmul float %x, %y
@@ -285,22 +372,22 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.
; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
-; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
+; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000
%ldexp = fmul float %x, %y
@@ -308,44 +395,83 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.
}
define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f32_test8:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v3, 0xc1000000
-; GFX7-NEXT: v_mov_b32_e32 v4, 0x41800000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f32_test8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f32_test8:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test8:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f32_test8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f32_test8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f32_test8:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f32_test8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f32_test8:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f32_test8:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f32_test8:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f32_test8:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 1.600000e+01, float -8.000000e+00
%ldexp = fmul float %x, %y
@@ -369,22 +495,22 @@ define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test9:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test9:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test9:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test9:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 0.000000e+00, float 2.000000e+00
%ldexp = fmul float %x, %y
@@ -410,22 +536,22 @@ define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test10:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test10:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test10:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test10:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float -0.000000e+00, float 0.000000e+00
%ldexp = fmul float %x, %y
@@ -451,22 +577,22 @@ define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool
; GFX9-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
-; GFX1030-NEXT: v_ldexp_f32 v0, -v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f32 v0, -v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
+; GFX10-NEXT: v_ldexp_f32 v0, -v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f32 v0, -v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000
%ldexp = fmul float %x, %y
@@ -474,44 +600,83 @@ define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool
}
define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x44
-; GFX7-NEXT: v_not_b32_e32 v4, 47
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x44
-; GFX9-NEXT: v_not_b32_e32 v4, 47
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_not_b32_e32 v3, 47
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
-; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_not_b32_e32 v3, 47
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
-; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x44
+; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 47
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_not_b32_e32 v3, 47
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x44
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x44
+; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 47
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_not_b32_e32 v3, 47
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x44
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_not_b32_e32 v3, 47
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x44
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_not_b32_e32 v3, 47
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x44
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000
%ldexp = fmul float %x, %y
@@ -535,22 +700,22 @@ define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 2.000000e+00, double 1.000000e+00
%ldexp = fmul double %x, %y
@@ -574,22 +739,22 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test2:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test2:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 5.000000e-01, double 1.000000e+00
%ldexp = fmul double %x, %y
@@ -619,28 +784,28 @@ define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.ar
; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_v2f64_test3:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test3:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_v2f64_test3:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_v2f64_test3:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
%ldexp = fmul <2 x double> %x, %y
@@ -670,28 +835,28 @@ define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.ar
; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_v2f64_test4:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test4:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
-; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_v2f64_test4:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_v2f64_test4:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo
+; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 5.000000e-01>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
%ldexp = fmul <2 x double> %x, %y
@@ -715,22 +880,22 @@ define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test5:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test5:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test5:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test5:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double -5.000000e-01, double -1.000000e+00
%ldexp = fmul double %x, %y
@@ -754,22 +919,22 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test6:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test6:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test6:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test6:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double -2.000000e+00, double -1.000000e+00
%ldexp = fmul double %x, %y
@@ -777,44 +942,64 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test7:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
-; GFX7-NEXT: v_mov_b32_e32 v2, 0
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f64_test7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test7:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test7:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test7:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f64_test7:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test7:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test7:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: fmul_select_f64_test7:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test7:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 2.000000e+00, double -1.000000e+00
%ldexp = fmul double %x, %y
@@ -838,22 +1023,22 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test8:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test8:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test8:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test8:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double -4.000000e+00, double -3.200000e+01
%ldexp = fmul double %x, %y
@@ -883,28 +1068,28 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar
; GFX9-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_v2f64_test9:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test9:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
-; GFX1100-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_v2f64_test9:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX10-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_v2f64_test9:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
+; GFX11-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x double> <double -2.000000e+00, double -2.000000e+00>, <2 x double> <double -1.000000e+00, double -1.000000e+00>
%ldexp = fmul <2 x double> %x, %y
@@ -912,60 +1097,115 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar
}
define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2f64_test10:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v8, 0xbff00000
-; GFX7-NEXT: v_mov_b32_e32 v9, 0x3fe00000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
-; GFX7-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX7-NEXT: v_mov_b32_e32 v8, 0
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
-; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2f64_test10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v8, 0xbff00000
-; GFX9-NEXT: v_mov_b32_e32 v9, 0x3fe00000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
-; GFX9-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
-; GFX9-NEXT: v_mov_b32_e32 v8, 0
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f64_test10:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v8, 0x3fe00000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1030-NEXT: v_mov_b32_e32 v8, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
-; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f64_test10:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v8, 0x3fe00000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
-; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2f64_test10:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2f64_test10:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v8, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX7-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2f64_test10:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2f64_test10:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_v2f64_test10:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_v2f64_test10:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v9, 0xbff00000
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, 0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_v2f64_test10:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_v2f64_test10:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v9, 0xbff00000 :: v_dual_mov_b32 v8, 0
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x double> <double 5.000000e-01, double 2.000000e+00>, <2 x double> <double -1.000000e+00, double 1.000000e+00>
%ldexp = fmul <2 x double> %x, %y
@@ -973,44 +1213,64 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a
}
define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test11:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_bfrev_b32_e32 v4, 1
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
-; GFX7-NEXT: v_mov_b32_e32 v2, 0
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f64_test11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_bfrev_b32_e32 v4, 1
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test11:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test11:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test11:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f64_test11:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test11:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test11:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: fmul_select_f64_test11:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test11:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double -2.000000e+00, double -0.000000e+00
%ldexp = fmul double %x, %y
@@ -1018,45 +1278,84 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test12:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7-NEXT: v_lshlrev_b32_e32 v3, 31, v2
-; GFX7-NEXT: v_mov_b32_e32 v2, 0
-; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f64_test12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test12:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v2, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test12:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test12:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v2
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f64_test12:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test12:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v2
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test12:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f64_test12:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v3
+; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f64_test12:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f64_test12:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3
+; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f64_test12:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 0.000000e+00, double -0.000000e+00
%ldexp = fmul double %x, %y
@@ -1084,24 +1383,24 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f64_test13:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0
-; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
-; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test13:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f64_test13:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f64_test13:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 0.000000e+00, double 1.600000e+01
%ldexp = fmul double %x, %y
@@ -1109,44 +1408,83 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2)
}
define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_not_b32_e32 v4, 26
-; GFX7-NEXT: v_mov_b32_e32 v5, 0x5c
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
-; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_not_b32_e32 v4, 26
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x5c
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v4, 0x5c
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v4, 0x5c
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 26
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
+; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c
+; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 26
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 26
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c
+; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 26
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 26
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 26
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000
%ldexp = fmul double %x, %y
@@ -1154,44 +1492,83 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo
}
define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_not_b32_e32 v4, 32
-; GFX7-NEXT: v_not_b32_e32 v5, 41
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
-; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_not_b32_e32 v4, 32
-; GFX9-NEXT: v_not_b32_e32 v5, 41
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
-; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_not_b32_e32 v4, 41
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
-; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_not_b32_e32 v4, 41
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
-; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 32
+; GFX7-SDAG-NEXT: v_not_b32_e32 v5, 41
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
+; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_not_b32_e32 v4, 41
+; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 32
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 32
+; GFX9-SDAG-NEXT: v_not_b32_e32 v5, 41
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_not_b32_e32 v4, 41
+; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 32
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_not_b32_e32 v4, 41
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 32
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_not_b32_e32 v4, 41
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 32
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000
%ldexp = fmul double %x, %y
@@ -1200,40 +1577,82 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo
define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test1:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test1:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test1:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test1:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 2.000000e+00, half 1.000000e+00
%ldexp = fmul half %x, %y
@@ -1241,47 +1660,89 @@ define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test2:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
-; GFX9-NEXT: s_movk_i32 s4, 0x8000
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff
-; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2
-; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: s_movk_i32 s4, 0x8000
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
-; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_movk_i32 s0, 0x8000
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
-; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test2:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test2:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test2:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
+; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test2:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test2:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test2:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 5.000000e-01, half 1.000000e+00
%ldexp = fmul half %x, %y
@@ -1289,59 +1750,126 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2f16_test3:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2f16_test3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f16_test3:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f16_test3:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2f16_test3:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2f16_test3:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2f16_test3:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x4000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2f16_test3:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_v2f16_test3:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_v2f16_test3:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
+; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_v2f16_test3:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_v2f16_test3:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x half> <half 2.000000e+00, half 2.000000e+00>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
%ldexp = fmul <2 x half> %x, %y
@@ -1349,59 +1877,126 @@ define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1,
}
define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2f16_test4:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2f16_test4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2f16_test4:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2f16_test4:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
-; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2f16_test4:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2f16_test4:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2f16_test4:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3800
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2f16_test4:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_v2f16_test4:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_v2f16_test4:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
+; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_v2f16_test4:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x3800
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_v2f16_test4:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x half> <half 5.000000e-01, half 5.000000e-01>, <2 x half> <half 1.000000e+00, half 1.000000e+00>
%ldexp = fmul <2 x half> %x, %y
@@ -1409,15 +2004,25 @@ define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1,
}
define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test5:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: fmul_select_f16_test5:
; GFX9: ; %bb.0:
@@ -1427,22 +2032,22 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test5:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
-; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test5:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f16_test5:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
+; GFX10-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f16_test5:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 2.000000e+00, half 8.000000e+00
%ldexp = fmul half %x, %y
@@ -1450,46 +2055,88 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test6:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000
-; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test6:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test6:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test6:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test6:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test6:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc800
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test6:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test6:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test6:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test6:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xc800
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test6:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half -8.000000e+00, half 3.000000e+00
%ldexp = fmul half %x, %y
@@ -1497,45 +2144,87 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test7:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test7:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test7:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test7:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test7:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test7:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4800
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test7:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test7:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test7:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test7:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x4800
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test7:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 8.000000e+00, half -4.000000e+00
%ldexp = fmul half %x, %y
@@ -1543,16 +2232,28 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test8:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_bfrev_b32_e32 v3, 1
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v3, 1
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: fmul_select_f16_test8:
; GFX9: ; %bb.0:
@@ -1563,22 +2264,22 @@ define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1030-LABEL: fmul_select_f16_test8:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
-; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test8:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: fmul_select_f16_test8:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fmul_select_f16_test8:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half -0.000000e+00, half 0.000000e+00
%ldexp = fmul half %x, %y
@@ -1586,40 +2287,87 @@ define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test9:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
-; GFX9-NEXT: v_ldexp_f16_e64 v0, -v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test9:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
-; GFX1030-NEXT: v_ldexp_f16_e64 v0, -v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test9:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_ldexp_f16_e64 v0, -v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test9:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test9:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 5, v1
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test9:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc
+; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test9:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 5, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test9:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
+; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test9:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test9:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test9:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half -1.600000e+01, half -3.200000e+01
%ldexp = fmul half %x, %y
@@ -1627,47 +2375,82 @@ define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) {
}
define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
-; GFX9-NEXT: s_movk_i32 s4, 0x8000
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff
-; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2
-; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: s_movk_i32 s4, 0x8000
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
-; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
-; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_movk_i32 s0, 0x8000
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
-; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
+; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
+; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 0xH1000, half 0xH6800
%ldexp = fmul half %x, %y
@@ -1675,47 +2458,82 @@ define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.a
}
define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
-; GFX9-NEXT: s_movk_i32 s4, 0x8000
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff
-; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2
-; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: s_movk_i32 s4, 0x8000
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
-; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
-; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_movk_i32 s0, 0x8000
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
-; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
+; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
+; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff
+; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, half 0xH5800, half 0xH0400
%ldexp = fmul half %x, %y
@@ -1723,72 +2541,114 @@ define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.ar
}
define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test1:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test1:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test1:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test1:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f80
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test1:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test1:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test1:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test1:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -1796,72 +2656,114 @@ define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test2:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x3f00
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test2:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3f00
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test2:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test2:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test2:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test2:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f80
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3f00
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test2:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test2:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f00
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test2:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test2:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test2:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -1869,111 +2771,158 @@ define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2bf16_test3:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
-; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2bf16_test3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
-; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x7060302
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2bf16_test3:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1
-; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
-; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0
-; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
-; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2bf16_test3:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1
-; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2bf16_test3:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2bf16_test3:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2bf16_test3:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f80
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x4000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-SDAG-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_add3_u32 v3, v3, v1, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_add3_u32 v2, v2, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v3, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x7060302
+; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2bf16_test3:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_v2bf16_test3:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX10-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX10-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_v2bf16_test3:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_v2bf16_test3:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX11-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_v2bf16_test3:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 2.000000e+00, bfloat 2.000000e+00>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
%ldexp = fmul <2 x bfloat> %x, %y
@@ -1981,111 +2930,158 @@ define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.a
}
define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) {
-; GFX7-LABEL: fmul_select_v2bf16_test4:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
-; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_v2bf16_test4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80
-; GFX9-NEXT: v_mov_b32_e32 v6, 0x3f00
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
-; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4
-; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
-; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
-; GFX9-NEXT: s_mov_b32 s4, 0x7060302
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_v2bf16_test4:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3f00
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
-; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2
-; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1
-; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
-; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0
-; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
-; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_v2bf16_test4:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3f00
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
-; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
-; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1
-; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
-; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_v2bf16_test4:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_v2bf16_test4:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_v2bf16_test4:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f80
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3f00
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-SDAG-NEXT: v_bfe_u32 v3, v1, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_add3_u32 v3, v3, v1, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v2, v0, 16, 1
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_add3_u32 v2, v2, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v3, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x7060302
+; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_v2bf16_test4:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_v2bf16_test4:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f00
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX10-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX10-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX10-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_v2bf16_test4:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_v2bf16_test4:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f00
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1
+; GFX11-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_v2bf16_test4:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2
%y = select <2 x i1> %bool, <2 x bfloat> <bfloat 5.000000e-01, bfloat 5.000000e-01>, <2 x bfloat> <bfloat 1.000000e+00, bfloat 1.000000e+00>
%ldexp = fmul <2 x bfloat> %x, %y
@@ -2093,73 +3089,108 @@ define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.a
}
define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test5:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4100
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test5:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test5:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test5:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4100
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test5:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test5:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test5:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test5:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test5:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -2167,74 +3198,116 @@ define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test6:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000
-; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4040
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc100
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test6:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc100
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test6:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test6:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x40400000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1000000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test6:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc100
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x4040
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test6:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4040
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc100
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test6:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc100
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4040
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test6:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc100
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test6:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4040
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc100, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test6:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test6:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4040
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc100, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -2242,73 +3315,115 @@ define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test7:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc080
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x4100
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test7:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4100
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test7:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test7:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test7:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x4100
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc080
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test7:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc080
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4100
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test7:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4100
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc080
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test7:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4100
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test7:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc080
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4100, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test7:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test7:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc080
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4100, vcc_lo
+; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -2316,73 +3431,111 @@ define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test8:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v1, 31, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
-; GFX9-NEXT: v_mov_b32_e32 v2, 15
-; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test8:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b16 v1, 15, v1
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test8:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_lshlrev_b16 v1, 15, v1
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 31, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test8:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 15
+; GFX9-SDAG-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test8:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 15, v1
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test8:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test8:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshlrev_b16 v1, 15, v1
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test8:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00
%ldexp = fmul bfloat %x, %y
@@ -2390,74 +3543,121 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test9:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000
-; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc200
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc180
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test9:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc180
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test9:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test9:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2000000
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1800000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test9:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 5, v1
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test9:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc200
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc180
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test9:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 5, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test9:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc180
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test9:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test9:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test9:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01
%ldexp = fmul bfloat %x, %y
@@ -2465,74 +3665,111 @@ define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2)
}
define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000
-; GFX7-NEXT: v_bfrev_b32_e32 v4, 7
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffdb80
-; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffe000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffe000
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xdb800000
+; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 7
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x41
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffdb80
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffe000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffe000
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 56, 0x41, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 56, 0x41, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80
%ldexp = fmul bfloat %x, %y
@@ -2540,74 +3777,111 @@ define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %b
}
define bfloat @fmul_select_bf16_test11_sel_log2val_neg22_pos25(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) {
-; GFX7-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_bfrev_b32_e32 v3, 50
-; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000
-; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x4c00
-; GFX9-NEXT: v_mov_b32_e32 v4, 0x3480
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
-; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
-; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
-; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1030-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
-; GFX1030: ; %bb.0:
-; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3480
-; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
-; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1030-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1100-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
-; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1
-; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0
-; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
-; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX7-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v3, 50
+; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x34800000
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-GISEL-NEXT: v_not_b32_e32 v3, 21
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 25, v3, vcc
+; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4c00
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3480
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff
+; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4
+; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_not_b32_e32 v3, 21
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 25, v3, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3480
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 25, 0xffffffea, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1
+; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0
+; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
+; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 25, 0xffffffea, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%bool = icmp eq i32 %bool.arg1, %bool.arg2
%y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00
%ldexp = fmul bfloat %x, %y
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index ebfb5e9ccaa35f..a324ba35b155fb 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -1625,14 +1625,12 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) {
; CODEGEN-IEEE-GISEL: ; %bb.0:
; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x4b800000
; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc
+; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: v_rsq_f32_e32 v0, v0
-; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x45800000
-; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc
+; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; IR-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract:
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 104e157e9e15ae..9ae60f99d5e094 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -3307,488 +3307,458 @@ define amdgpu_ps i32 @s_mul_32_f16(half inreg %x, half inreg %y) {
; --------------------------------------------------------------------
define float @v_mul_f32_select_64_1(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f32_select_64_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_64_1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float 64.0, float 1.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_1_64(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_1_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_1_64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float 1.0, float 64.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_n1_n64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_n1_n64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float -1.0, float -64.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_n64_n1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_n64_n1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float 64.0, float 1.0
+ %select.pow2 = select i1 %cond, float -64.0, float -1.0
%mul = fmul float %x, %select.pow2
ret float %mul
}
-define float @v_mul_f32_select_1_64(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_1_64:
+define float @v_mul_f32_select_128_64(i32 %arg, float %x) {
+; GFX9-SDAG-LABEL: v_mul_f32_select_128_64:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc
; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_1_64:
+; GFX9-GISEL-LABEL: v_mul_f32_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_1_64:
+; GFX10-SDAG-LABEL: v_mul_f32_select_128_64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_1_64:
+; GFX10-GISEL-LABEL: v_mul_f32_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_1_64:
+; GFX11-SDAG-LABEL: v_mul_f32_select_128_64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_1_64:
+; GFX11-GISEL-LABEL: v_mul_f32_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float 1.0, float 64.0
+ %select.pow2 = select i1 %cond, float 128.0, float 64.0
%mul = fmul float %x, %select.pow2
ret float %mul
}
-define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_n1_n64:
+define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) {
+; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc
; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_n1_n64:
+; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1.0, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_n1_n64:
+; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_n1_n64:
+; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_n1_n64:
+; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_n1_n64:
+; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float -1.0, float -64.0
+ %select.pow2 = select i1 %cond, float -128.0, float -64.0
%mul = fmul float %x, %select.pow2
ret float %mul
}
-define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_n64_n1:
+define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_n128_n16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_mul_f32_select_n128_n16:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float -128.0, float -16.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_contract_mul_add_f32_select_64_1(i32 %arg, float %x, float %y) {
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_n64_n1:
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_n64_n1:
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_n64_n1:
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_n64_n1:
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_n64_n1:
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_64_1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float -64.0, float -1.0
- %mul = fmul float %x, %select.pow2
- ret float %mul
+ %select.pow2 = select contract i1 %cond, float 64.0, float 1.0
+ %mul = fmul contract float %x, %select.pow2
+ %fma = fadd contract float %mul, %y
+ ret float %fma
}
-define float @v_mul_f32_select_128_64(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_128_64:
+define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) {
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_128_64:
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x43000000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_128_64:
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_128_64:
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_128_64:
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_128_64:
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_1_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float 128.0, float 64.0
- %mul = fmul float %x, %select.pow2
- ret float %mul
+ %select.pow2 = select contract i1 %cond, float 1.0, float 64.0
+ %mul = fmul contract float %x, %select.pow2
+ %fma = fadd contract float %mul, %y
+ ret float %fma
}
-define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64:
+define float @v_contract_mul_add_f32_select_n64_n1(i32 %arg, float %x, float %y) {
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2800000
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64:
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64:
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64:
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64:
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64:
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float -128.0, float -64.0
- %mul = fmul float %x, %select.pow2
- ret float %mul
+ %select.pow2 = select contract i1 %cond, float -64.0, float -1.0
+ %mul = fmul contract float %x, %select.pow2
+ %fma = fadd contract float %mul, %y
+ ret float %fma
}
-define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n16:
+define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) {
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2800000
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n16:
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n16:
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n16:
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n16:
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n16:
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
- %cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float -128.0, float -16.0
- %mul = fmul float %x, %select.pow2
- ret float %mul
-}
-
-define float @v_contract_mul_add_f32_select_64_1(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_64_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_64_1:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- %cond = icmp eq i32 %arg, 0
- %select.pow2 = select contract i1 %cond, float 64.0, float 1.0
- %mul = fmul contract float %x, %select.pow2
- %fma = fadd contract float %mul, %y
- ret float %fma
-}
-
-define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_1_64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_1_64:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- %cond = icmp eq i32 %arg, 0
- %select.pow2 = select contract i1 %cond, float 1.0, float 64.0
- %mul = fmul contract float %x, %select.pow2
- %fma = fadd contract float %mul, %y
- ret float %fma
-}
-
-define float @v_contract_mul_add_f32_select_n64_n1(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_n64_n1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_n64_n1:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- %cond = icmp eq i32 %arg, 0
- %select.pow2 = select contract i1 %cond, float -64.0, float -1.0
- %mul = fmul contract float %x, %select.pow2
- %fma = fadd contract float %mul, %y
- ret float %fma
-}
-
-define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_n1_n64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_n1_n64:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, float -1.0, float -64.0
%mul = fmul contract float %x, %select.pow2
@@ -3810,11 +3780,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y)
; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43000000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_128_64:
@@ -3829,10 +3799,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y)
; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_128_64:
@@ -3847,10 +3818,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y)
; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, float 128.0, float 64.0
@@ -3860,22 +3832,57 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y)
}
define float @v_contract_mul_add_f32_select_128_4(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_128_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000
+; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_128_4:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_128_4:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, float 128.0, float 4.0
%mul = fmul contract float %x, %select.pow2
@@ -3907,203 +3914,124 @@ define float @v_contract_mul_add_f32_select_2_4(i32 %arg, float %x, float %y) {
}
define float @v_contract_mul_add_f32_select_4_128(i32 %arg, float %x, float %y) {
-; GFX9-LABEL: v_contract_mul_add_f32_select_4_128:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc
-; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1011-LABEL: v_contract_mul_add_f32_select_4_128:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo
-; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
- %cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float 4.0, float 128.0
- %mul = fmul contract float %x, %select.pow2
- %fma = fadd contract float %mul, %y
- ret float %fma
-}
-
-define double @v_mul_f64_select_64_1(i32 %arg, double %x) {
-; GFX9-SDAG-LABEL: v_mul_f64_select_64_1:
+; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc
+; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f64_select_64_1:
+; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3ff00000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f64_select_64_1:
+; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f64_select_64_1:
+; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f64_select_64_1:
+; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f64_select_64_1:
+; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_4_128:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x3ff00000 :: v_dual_mov_b32 v3, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, double 64.0, double 1.0
- %mul = fmul double %x, %select.pow2
- ret double %mul
+ %select.pow2 = select i1 %cond, float 4.0, float 128.0
+ %mul = fmul contract float %x, %select.pow2
+ %fma = fadd contract float %mul, %y
+ ret float %fma
}
-define double @v_mul_f64_select_1_64(i32 %arg, double %x) {
-; GFX9-SDAG-LABEL: v_mul_f64_select_1_64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f64_select_1_64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f64_select_1_64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f64_select_1_64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f64_select_1_64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+define double @v_mul_f64_select_64_1(i32 %arg, double %x) {
+; GFX9-LABEL: v_mul_f64_select_64_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f64_select_1_64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f64_select_64_1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, double 1.0, double 64.0
+ %select.pow2 = select i1 %cond, double 64.0, double 1.0
%mul = fmul double %x, %select.pow2
ret double %mul
-}
-
-define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) {
-; GFX9-SDAG-LABEL: v_mul_f64_select_n1_n64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f64_select_n1_n64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbff00000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f64_select_n1_n64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f64_select_n1_n64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+}
+
+define double @v_mul_f64_select_1_64(i32 %arg, double %x) {
+; GFX9-LABEL: v_mul_f64_select_1_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f64_select_n1_n64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f64_select_1_64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, double 1.0, double 64.0
+ %mul = fmul double %x, %select.pow2
+ ret double %mul
+}
+
+define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) {
+; GFX9-LABEL: v_mul_f64_select_n1_n64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f64_select_n1_n64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f64_select_n1_n64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double -1.0, double -64.0
%mul = fmul double %x, %select.pow2
@@ -4122,12 +4050,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) {
; GFX9-GISEL-LABEL: v_mul_f64_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40600000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_f64_select_128_64:
@@ -4141,11 +4067,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) {
; GFX10-GISEL-LABEL: v_mul_f64_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_f64_select_128_64:
@@ -4159,10 +4084,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) {
; GFX11-GISEL-LABEL: v_mul_f64_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double 128.0, double 64.0
@@ -4182,12 +4107,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) {
; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n64:
@@ -4201,11 +4124,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) {
; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n64:
@@ -4219,10 +4141,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) {
; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double -128.0, double -64.0
@@ -4231,59 +4153,21 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) {
}
define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) {
-; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0300000
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0300000
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f64_select_n128_n16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
+; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0300000 :: v_dual_mov_b32 v3, 0
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4]
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f64_select_n128_n16:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
+; GFX1011-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double -128.0, double -16.0
%mul = fmul double %x, %select.pow2
@@ -4305,12 +4189,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x3ff00000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_64_1:
@@ -4326,11 +4208,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_64_1:
@@ -4345,10 +4226,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_64_1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x3ff00000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, double 64.0, double 1.0
@@ -4372,12 +4253,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_1_64:
@@ -4393,11 +4272,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_1_64:
@@ -4412,10 +4290,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_1_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, double 1.0, double 64.0
@@ -4439,12 +4317,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xbff00000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1:
@@ -4460,11 +4336,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1:
@@ -4479,10 +4354,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xbff00000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, double -64.0, double -1.0
@@ -4506,12 +4381,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xc0500000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64:
@@ -4527,11 +4400,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64:
@@ -4546,10 +4418,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xc0500000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, double -1.0, double -64.0
@@ -4573,12 +4445,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_64:
@@ -4594,11 +4465,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_64:
@@ -4613,10 +4484,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double 128.0, double 64.0
@@ -4640,12 +4512,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double %
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40100000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_4:
@@ -4661,11 +4531,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double %
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_4:
@@ -4680,10 +4549,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double %
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_4:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40100000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double 128.0, double 4.0
@@ -4706,21 +4575,50 @@ define double @v_contract_mul_add_f64_select_2_4(i32 %arg, double %x, double %y)
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_2_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 2.0, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_contract_mul_add_f64_select_2_4:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1011-NEXT: v_mov_b32_e32 v5, 0
-; GFX1011-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo
-; GFX1011-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_2_4:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0
+; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo
+; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_2_4:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_2_4:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0
+; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo
+; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_2_4:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double 2.0, double 4.0
%mul = fmul contract double %x, %select.pow2
@@ -4743,12 +4641,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double %
; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40600000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_4_128:
@@ -4764,11 +4660,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double %
; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_4_128:
@@ -4783,10 +4678,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double %
; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_4_128:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40600000 :: v_dual_mov_b32 v5, 0
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4]
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0
+; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, double 4.0, double 128.0
@@ -4796,57 +4691,21 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double %
}
define half @v_mul_f16_select_64_1(i32 %arg, half %x) {
-; GFX9-SDAG-LABEL: v_mul_f16_select_64_1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f16_select_64_1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f16_select_64_1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f16_select_64_1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f16_select_64_1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f16_select_64_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f16_select_64_1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f16_select_64_1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 64.0, half 1.0
%mul = fmul half %x, %select.pow2
@@ -4854,57 +4713,21 @@ define half @v_mul_f16_select_64_1(i32 %arg, half %x) {
}
define half @v_mul_f16_select_1_64(i32 %arg, half %x) {
-; GFX9-SDAG-LABEL: v_mul_f16_select_1_64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f16_select_1_64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f16_select_1_64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f16_select_1_64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f16_select_1_64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f16_select_1_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f16_select_1_64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f16_select_1_64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 1.0, half 64.0
%mul = fmul half %x, %select.pow2
@@ -4912,57 +4735,21 @@ define half @v_mul_f16_select_1_64(i32 %arg, half %x) {
}
define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) {
-; GFX9-SDAG-LABEL: v_mul_f16_select_n1_n64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f16_select_n1_n64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xbc00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f16_select_n1_n64:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f16_select_n1_n64:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f16_select_n1_n64:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f16_select_n1_n64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f16_select_n1_n64:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f16_select_n1_n64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half -1.0, half -64.0
%mul = fmul half %x, %select.pow2
@@ -4981,11 +4768,13 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) {
; GFX9-GISEL-LABEL: v_mul_f16_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_f16_select_128_64:
@@ -4999,10 +4788,12 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) {
; GFX10-GISEL-LABEL: v_mul_f16_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_f16_select_128_64:
@@ -5016,10 +4807,12 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) {
; GFX11-GISEL-LABEL: v_mul_f16_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 128.0, half 64.0
@@ -5039,11 +4832,13 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) {
; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v2, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n64:
@@ -5057,10 +4852,12 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) {
; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n64:
@@ -5074,10 +4871,12 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) {
; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half -128.0, half -64.0
@@ -5086,57 +4885,21 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) {
}
define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) {
-; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f16_select_n128_n16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
+; GFX9-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f16_select_n128_n16:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
+; GFX1011-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half -128.0, half -16.0
%mul = fmul half %x, %select.pow2
@@ -5157,11 +4920,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_64_1:
@@ -5176,10 +4938,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_64_1:
@@ -5194,10 +4956,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_64_1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, half 64.0, half 1.0
@@ -5220,11 +4982,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_1_64:
@@ -5239,10 +5000,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_1_64:
@@ -5257,10 +5018,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_1_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, half 1.0, half 64.0
@@ -5283,11 +5044,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1:
@@ -5302,10 +5062,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1:
@@ -5320,10 +5080,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, half -64.0, half -1.0
@@ -5346,11 +5106,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64:
@@ -5365,10 +5124,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64:
@@ -5383,10 +5142,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select contract i1 %cond, half -1.0, half -64.0
@@ -5409,11 +5168,14 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_64:
@@ -5428,10 +5190,13 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_64:
@@ -5446,10 +5211,13 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 128.0, half 64.0
@@ -5472,11 +5240,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_4:
@@ -5491,10 +5258,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_4:
@@ -5509,10 +5276,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_4:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 128.0, half 4.0
@@ -5535,11 +5302,14 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_2_4:
@@ -5554,10 +5324,13 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_2_4:
@@ -5572,10 +5345,13 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_2_4:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 2.0, half 4.0
@@ -5598,11 +5374,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) {
; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_4_128:
@@ -5617,10 +5392,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) {
; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_4_128:
@@ -5635,10 +5410,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) {
; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_4_128:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo
-; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0
+; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
%select.pow2 = select i1 %cond, half 4.0, half 128.0
@@ -5664,15 +5439,13 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_64_1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_64_1:
@@ -5690,14 +5463,14 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_64_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_64_1:
@@ -5715,14 +5488,15 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_64_1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 64.0, half 64.0>, <2 x half> <half 1.0, half 1.0>
@@ -5747,15 +5521,13 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_1_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_1_64:
@@ -5773,14 +5545,14 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_1_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_1_64:
@@ -5798,14 +5570,15 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_1_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 1.0, half 1.0>, <2 x half> <half 64.0, half 64.0>
@@ -5830,15 +5603,14 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_n1_n64:
@@ -5856,14 +5628,15 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_n1_n64:
@@ -5881,14 +5654,16 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_n1_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half -1.0, half -1.0>, <2 x half> <half -64.0, half -64.0>
@@ -5913,15 +5688,19 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_128_64:
@@ -5939,14 +5718,19 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_128_64:
@@ -5964,14 +5748,20 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 64.0, half 64.0>
@@ -5996,15 +5786,20 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n64:
@@ -6022,14 +5817,20 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n64:
@@ -6047,14 +5848,21 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half -128.0, half -128.0>, <2 x half> <half -64.0, half -64.0>
@@ -6079,15 +5887,14 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) {
; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xcc00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n16:
@@ -6105,14 +5912,15 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) {
; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n16:
@@ -6130,14 +5938,16 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) {
; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half -128.0, half -128.0>, <2 x half> <half -16.0, half -16.0>
@@ -6162,15 +5972,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3c00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1:
@@ -6188,14 +5997,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1:
@@ -6213,14 +6023,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 64.0, half 64.0>, <2 x half> <half 1.0, half 1.0>
@@ -6246,15 +6058,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64:
@@ -6272,14 +6083,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64:
@@ -6297,14 +6109,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 1.0, half 1.0>, <2 x half> <half 64.0, half 64.0>
@@ -6330,15 +6144,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbc00
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
@@ -6356,14 +6170,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
@@ -6381,14 +6197,17 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half -64.0, half -64.0>, <2 x half> <half -1.0, half -1.0>
@@ -6414,15 +6233,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xd400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
@@ -6440,14 +6259,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
@@ -6465,14 +6286,17 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half -1.0, half -1.0>, <2 x half> <half -64.0, half -64.0>
@@ -6498,15 +6322,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v4, v5
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v4, v5
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64:
@@ -6524,14 +6353,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64:
@@ -6549,14 +6384,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x7fff :: v_dual_add_nc_u32 v1, 6, v1
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v5, v1
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 64.0, half 64.0>
@@ -6582,15 +6423,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4:
@@ -6608,14 +6448,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4:
@@ -6633,14 +6474,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 128.0, half 128.0>, <2 x half> <half 4.0, half 4.0>
@@ -6666,15 +6509,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4000
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0
+; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 2, v1
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v4, v5
+; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v4, v5
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4:
@@ -6692,14 +6540,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4
+; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 2, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4:
@@ -6717,14 +6571,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x7fff :: v_dual_add_nc_u32 v1, 2, v1
+; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4
+; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v5, v1
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 4.0, half 4.0>
@@ -6750,15 +6610,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha
; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5800
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
-; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128:
@@ -6776,14 +6635,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha
; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo
+; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128:
@@ -6801,14 +6661,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha
; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo
; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc_lo
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3
+; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq <2 x i32> %arg, zeroinitializer
%select.pow2 = select <2 x i1> %cond, <2 x half> <half 4.0, half 4.0>, <2 x half> <half 128.0, half 128.0>
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index b3001819e9aafd..c1d5b5857b6b53 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -2380,14 +2380,12 @@ define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) {
; GISEL-IEEE: ; %bb.0:
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000
-; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4b800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0
-; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x45800000
-; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
;
; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_rcp:
@@ -2734,20 +2732,18 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) {
; GISEL-IEEE: ; %bb.0:
; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000
-; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x4b800000
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 24, vcc
; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v4
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5]
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, s[4:5]
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GISEL-IEEE-NEXT: v_rsq_f32_e32 v1, v1
-; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x45800000
-; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v2
-; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5]
-; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2
+; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, s[4:5]
+; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2
; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31]
;
; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index ac515808a0d8a7..333d428c84bcca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -41,10 +41,10 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: s_mov_b32 s2, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
@@ -78,9 +78,9 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) {
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; VI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
@@ -115,9 +115,9 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) {
; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s0, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; SI-GISEL-NEXT: v_not_b32_e32 v2, 63
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
@@ -213,10 +213,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; SI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v3, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -252,7 +252,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; VI-GISEL-NEXT: v_not_b32_e32 v2, 63
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
@@ -262,10 +262,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; VI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v3, v3
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v3, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -300,7 +300,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000
+; GFX900-GISEL-NEXT: v_not_b32_e32 v2, 63
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
@@ -310,10 +310,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s11, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v3
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v3, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
@@ -421,17 +421,17 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; SI-GISEL-NEXT: v_not_b32_e32 v3, 63
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; SI-GISEL-NEXT: v_add_f32_e32 v0, s0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v4
; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
; SI-GISEL-NEXT: v_add_f32_e32 v4, s1, v4
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
@@ -439,11 +439,11 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; SI-GISEL-NEXT: v_exp_f32_e32 v4, v4
; SI-GISEL-NEXT: v_add_f32_e32 v1, s2, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
@@ -487,16 +487,16 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; VI-GISEL-NEXT: v_not_b32_e32 v3, 63
; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; VI-GISEL-NEXT: v_add_f32_e32 v0, s0, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v4
; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
; VI-GISEL-NEXT: v_add_f32_e32 v4, s1, v4
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
@@ -504,10 +504,10 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; VI-GISEL-NEXT: v_add_f32_e32 v1, s2, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v4, v4
; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v4, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
@@ -551,15 +551,15 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000
+; GFX900-GISEL-NEXT: v_not_b32_e32 v3, 63
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s0, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v4
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
; GFX900-GISEL-NEXT: v_add_f32_e32 v4, s1, v4
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
@@ -567,10 +567,10 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s2, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v4, v4
; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v4, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
@@ -710,7 +710,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; SI-GISEL-NEXT: v_not_b32_e32 v4, 63
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
@@ -720,22 +720,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_add_f32_e32 v1, s9, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v5
; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
; SI-GISEL-NEXT: v_add_f32_e32 v5, s10, v5
; SI-GISEL-NEXT: v_add_f32_e32 v2, s11, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5
; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v5, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v3, v4
; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
@@ -787,7 +787,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; VI-GISEL-NEXT: v_not_b32_e32 v4, 63
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
@@ -797,22 +797,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_add_f32_e32 v1, s9, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v5
; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
; VI-GISEL-NEXT: v_add_f32_e32 v5, s10, v5
; VI-GISEL-NEXT: v_add_f32_e32 v2, s11, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5
; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
+; VI-GISEL-NEXT: v_ldexp_f32 v2, v5, v2
+; VI-GISEL-NEXT: v_ldexp_f32 v3, v3, v4
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -863,7 +863,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000
+; GFX900-GISEL-NEXT: v_not_b32_e32 v4, 63
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
@@ -873,22 +873,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s9, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v5
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1]
; GFX900-GISEL-NEXT: v_add_f32_e32 v5, s10, v5
; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s11, v2
; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5
; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v5, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v3, v4
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
@@ -1006,19 +1006,19 @@ define float @v_exp2_f32(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32:
; VI-SDAG: ; %bb.0:
@@ -1034,6 +1034,20 @@ define float @v_exp2_f32(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1048,6 +1062,20 @@ define float @v_exp2_f32(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1076,19 +1104,19 @@ define float @v_exp2_fabs_f32(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_fabs_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_fabs_f32:
; VI-SDAG: ; %bb.0:
@@ -1104,6 +1132,20 @@ define float @v_exp2_fabs_f32(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_fabs_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1118,6 +1160,20 @@ define float @v_exp2_fabs_f32(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_fabs_f32:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1147,19 +1203,19 @@ define float @v_exp2_fneg_fabs_f32(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_fneg_fabs_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_fneg_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_fneg_fabs_f32:
; VI-SDAG: ; %bb.0:
@@ -1175,6 +1231,20 @@ define float @v_exp2_fneg_fabs_f32(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_fneg_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_fneg_fabs_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1189,6 +1259,20 @@ define float @v_exp2_fneg_fabs_f32(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_fneg_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0|
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_fneg_fabs_f32:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1219,19 +1303,19 @@ define float @v_exp2_fneg_f32(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_fneg_f32:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_fneg_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_fneg_f32:
; VI-SDAG: ; %bb.0:
@@ -1247,6 +1331,20 @@ define float @v_exp2_fneg_f32(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_fneg_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_fneg_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1261,6 +1359,20 @@ define float @v_exp2_fneg_f32(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_fneg_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_fneg_f32:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1290,19 +1402,19 @@ define float @v_exp2_f32_fast(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_fast:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_fast:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_fast:
; VI-SDAG: ; %bb.0:
@@ -1318,6 +1430,20 @@ define float @v_exp2_f32_fast(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_fast:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_fast:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1332,6 +1458,20 @@ define float @v_exp2_f32_fast(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_fast:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_fast:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1360,19 +1500,19 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_unsafe_math_attr:
; VI-SDAG: ; %bb.0:
@@ -1388,6 +1528,20 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_unsafe_math_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1402,6 +1556,20 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_unsafe_math_attr:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1430,19 +1598,19 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
; VI-SDAG: ; %bb.0:
@@ -1458,6 +1626,20 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1472,6 +1654,20 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_approx_fn_attr:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1500,19 +1696,19 @@ define float @v_exp2_f32_ninf(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_ninf:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_ninf:
; VI-SDAG: ; %bb.0:
@@ -1528,6 +1724,20 @@ define float @v_exp2_f32_ninf(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1542,6 +1752,20 @@ define float @v_exp2_f32_ninf(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_ninf:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1570,19 +1794,19 @@ define float @v_exp2_f32_afn(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_afn:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_afn:
; VI-SDAG: ; %bb.0:
@@ -1598,6 +1822,20 @@ define float @v_exp2_f32_afn(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1612,6 +1850,20 @@ define float @v_exp2_f32_afn(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_afn:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1660,19 +1912,19 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_afn_dynamic:
; VI-SDAG: ; %bb.0:
@@ -1688,6 +1940,20 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_afn_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_afn_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1702,6 +1968,20 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_afn_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_afn_dynamic:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1730,19 +2010,19 @@ define float @v_fabs_exp2_f32_afn(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_fabs_exp2_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_fabs_exp2_f32_afn:
; VI-SDAG: ; %bb.0:
@@ -1758,6 +2038,20 @@ define float @v_fabs_exp2_f32_afn(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_fabs_exp2_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_fabs_exp2_f32_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1772,6 +2066,20 @@ define float @v_fabs_exp2_f32_afn(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_fabs_exp2_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_fabs_exp2_f32_afn:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1821,19 +2129,19 @@ define float @v_exp2_f32_nnan(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_nnan:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_nnan:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_nnan:
; VI-SDAG: ; %bb.0:
@@ -1849,6 +2157,20 @@ define float @v_exp2_f32_nnan(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_nnan:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_nnan:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1863,6 +2185,20 @@ define float @v_exp2_f32_nnan(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_nnan:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -1911,19 +2247,19 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_nnan_dynamic:
; VI-SDAG: ; %bb.0:
@@ -1939,6 +2275,20 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_nnan_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1953,6 +2303,20 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_nnan_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_nnan_dynamic:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -2001,19 +2365,19 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_ninf_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2029,6 +2393,20 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2043,6 +2421,20 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_ninf_dynamic:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -2071,19 +2463,19 @@ define float @v_exp2_f32_nnan_ninf(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf:
; VI-SDAG: ; %bb.0:
@@ -2099,6 +2491,20 @@ define float @v_exp2_f32_nnan_ninf(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2113,6 +2519,20 @@ define float @v_exp2_f32_nnan_ninf(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_nnan_ninf:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -2161,19 +2581,19 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2189,6 +2609,20 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2203,6 +2637,20 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -2251,19 +2699,19 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_dynamic_mode:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_dynamic_mode:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_dynamic_mode:
; VI-SDAG: ; %bb.0:
@@ -2279,6 +2727,20 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_dynamic_mode:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_dynamic_mode:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2293,6 +2755,20 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_dynamic_mode:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_dynamic_mode:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -2313,20 +2789,50 @@ define float @v_exp2_f32_undef() {
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_undef:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
-; GCN-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
-; GCN-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_undef:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; SI-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
+; SI-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_exp2_f32_undef:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; VI-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
+; VI-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp2_f32_undef:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s4, v1
+; GFX900-GISEL-NEXT: v_add_f32_e64 v2, s4, 0
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp2_f32_undef:
; R600: ; %bb.0:
@@ -3359,19 +3865,19 @@ define float @v_exp2_f32_contract(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_contract:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_contract:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_contract:
; VI-SDAG: ; %bb.0:
@@ -3387,6 +3893,20 @@ define float @v_exp2_f32_contract(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_contract:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_contract:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3401,6 +3921,20 @@ define float @v_exp2_f32_contract(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_contract:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_contract:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -3449,19 +3983,19 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) {
; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GCN-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
-; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
-; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf:
; VI-SDAG: ; %bb.0:
@@ -3477,6 +4011,20 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) {
; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_not_b32_e32 v1, 63
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3491,6 +4039,20 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) {
; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; R600-LABEL: v_exp2_f32_contract_nnan_ninf:
; R600: ; %bb.0:
; R600-NEXT: CF_END
@@ -3518,3 +4080,5 @@ declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2
attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN-GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 218e41faa703dc..b850428a03c05e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -45,16 +45,17 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: s_load_dword s0, s[4:5], 0xb
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s0, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
@@ -64,7 +65,6 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
@@ -104,25 +104,25 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; VI-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
@@ -162,25 +162,25 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, -v5
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v5, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -218,24 +218,26 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
-; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, s2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_f32:
@@ -358,35 +360,36 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2
-; SI-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v1
+; SI-GISEL-NEXT: v_fma_f32 v6, v1, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v6, v1, v3, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v5, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
-; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
-; SI-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v5
+; SI-GISEL-NEXT: v_fma_f32 v2, v5, v2, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v5, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
@@ -445,42 +448,43 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_and_b32_e32 v4, 0xfffff000, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s6, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4
; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; VI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v0
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v5, v1, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v3, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1
; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v5
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
@@ -531,37 +535,38 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s10, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v3, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v4, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0
-; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1]
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v6
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v3, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v4, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -608,31 +613,37 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v3, 0x3f317217, v1 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v5, 0x3377d1cf, v1
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
; GFX1100-GISEL-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v4, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_mov_b32 v2, 0
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
@@ -808,49 +819,51 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6
-; SI-GISEL-NEXT: v_log_f32_e32 v6, v6
-; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, s9, v5
+; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8
-; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2
-; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v3, v6, v3
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_fma_f32 v8, v5, v2, -v7
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_fma_f32 v8, v5, v3, v8
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; SI-GISEL-NEXT: v_log_f32_e32 v8, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v8
+; SI-GISEL-NEXT: v_fma_f32 v2, v8, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v2, v8, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v5, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v8|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
@@ -927,12 +940,13 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3
@@ -943,45 +957,46 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, s9, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3
; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v3, v5
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_log_f32_e32 v6, v1
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v6, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v5
; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v3
; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v6|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v3, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
@@ -1046,49 +1061,51 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v2, -v6
; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6
+; GFX900-GISEL-NEXT: v_ldexp_f32 v6, s9, v6
; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v6
; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8
-; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v2, -v8
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v6, v3
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v9
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v2, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v4, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v6, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v9|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -1156,49 +1173,55 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s6
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v2 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v8, 0x3377d1cf, v2
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v5, v5, v8
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v6, 0x3377d1cf, v0 :: v_dual_lshlrev_b32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
+; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1
; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0
-; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1
-; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_mov_b32 v3, 0
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v9 :: v_dual_sub_f32 v1, v1, v10
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
@@ -1433,62 +1456,65 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, -v1
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v4, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1
; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
-; SI-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v1
+; SI-GISEL-NEXT: v_fma_f32 v8, v1, v3, -v7
+; SI-GISEL-NEXT: v_fma_f32 v8, v1, v4, v8
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
-; SI-GISEL-NEXT: v_log_f32_e32 v9, v9
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 5, v8
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v8, s10, v8
+; SI-GISEL-NEXT: v_log_f32_e32 v8, v8
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
-; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
-; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
-; SI-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4
-; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_fma_f32 v9, v8, v3, -v7
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; SI-GISEL-NEXT: v_fma_f32 v9, v8, v4, v9
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
+; SI-GISEL-NEXT: v_log_f32_e32 v9, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v8|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v9
+; SI-GISEL-NEXT: v_fma_f32 v3, v9, v3, -v7
+; SI-GISEL-NEXT: v_fma_f32 v3, v9, v4, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v3, v7, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
@@ -1581,12 +1607,13 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1
@@ -1597,62 +1624,64 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
-; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v1, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, s10, v7
-; VI-GISEL-NEXT: v_log_f32_e32 v7, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1]
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6
+; VI-GISEL-NEXT: v_ldexp_f32 v6, s10, v6
+; VI-GISEL-NEXT: v_log_f32_e32 v6, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v6, v5
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x3805fdf4, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317000, v8
-; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
+; VI-GISEL-NEXT: v_log_f32_e32 v7, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v7
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v7, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v7|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1730,61 +1759,64 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v1
; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v3, -v8
; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v9, 5, v9
+; GFX900-GISEL-NEXT: v_ldexp_f32 v9, s10, v9
; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
-; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v3, -v8
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; GFX900-GISEL-NEXT: v_log_f32_e32 v10, v2
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
-; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
-; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4
-; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v10
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v3, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v5, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v8, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, v6
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v5
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -1860,60 +1892,67 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
+; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v7, v7, v12
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5
; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7
-; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3
; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_sub_f32 v0, v0, v4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
-; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v9 :: v_dual_sub_f32 v2, v2, v14
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: global_store_b128 v5, v[0:3], s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log_v4f32:
@@ -2126,10 +2165,10 @@ define float @v_log_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2175,16 +2214,16 @@ define float @v_log_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -2224,10 +2263,10 @@ define float @v_log_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2270,21 +2309,22 @@ define float @v_log_f32(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2329,10 +2369,10 @@ define float @v_log_fabs_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2378,16 +2418,16 @@ define float @v_log_fabs_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -2427,10 +2467,10 @@ define float @v_log_fabs_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2475,20 +2515,22 @@ define float @v_log_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2534,10 +2576,10 @@ define float @v_log_fneg_fabs_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2583,16 +2625,16 @@ define float @v_log_fneg_fabs_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -2632,10 +2674,10 @@ define float @v_log_fneg_fabs_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2680,20 +2722,22 @@ define float @v_log_fneg_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2740,10 +2784,10 @@ define float @v_log_fneg_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2789,16 +2833,16 @@ define float @v_log_fneg_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -2838,10 +2882,10 @@ define float @v_log_fneg_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -2885,20 +2929,22 @@ define float @v_log_fneg_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3304,10 +3350,10 @@ define float @v_log_f32_ninf(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -3353,16 +3399,16 @@ define float @v_log_f32_ninf(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -3402,10 +3448,10 @@ define float @v_log_f32_ninf(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -3448,21 +3494,22 @@ define float @v_log_f32_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4038,10 +4085,10 @@ define float @v_log_f32_nnan(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4087,16 +4134,16 @@ define float @v_log_f32_nnan(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -4136,10 +4183,10 @@ define float @v_log_f32_nnan(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4182,21 +4229,22 @@ define float @v_log_f32_nnan(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4381,10 +4429,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4430,16 +4478,16 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -4479,10 +4527,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4525,21 +4573,22 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4724,10 +4773,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4773,16 +4822,16 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -4822,10 +4871,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4868,21 +4917,22 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4924,10 +4974,10 @@ define float @v_log_f32_nnan_ninf(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -4967,16 +5017,16 @@ define float @v_log_f32_nnan_ninf(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -5010,10 +5060,10 @@ define float @v_log_f32_nnan_ninf(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -5051,18 +5101,20 @@ define float @v_log_f32_nnan_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5207,10 +5259,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -5250,16 +5302,16 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -5293,10 +5345,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -5334,18 +5386,20 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5419,10 +5473,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -5468,16 +5522,16 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
@@ -5517,10 +5571,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
@@ -5563,21 +5617,22 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index fd50d1b60fbd10..d09df75837339c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -45,16 +45,17 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: s_load_dword s0, s[4:5], 0xb
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s0, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
@@ -64,7 +65,6 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
@@ -104,25 +104,25 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; VI-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
@@ -162,25 +162,25 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, -v5
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v5, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -218,24 +218,26 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
-; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, s2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_f32:
@@ -358,35 +360,36 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s6, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2
-; SI-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_fma_f32 v6, v1, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v6, v1, v3, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v5, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1
-; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
-; SI-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v5
+; SI-GISEL-NEXT: v_fma_f32 v2, v5, v2, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v5, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
@@ -445,42 +448,43 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_and_b32_e32 v4, 0xfffff000, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s6, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4
; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; VI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v0
; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b
; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v5, v1, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v3, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1
; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v5
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
@@ -531,37 +535,38 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s10, v2
-; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v3, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v4, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0
-; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1]
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v6
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v3, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v4, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -608,31 +613,37 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v3, 0x3e9a209a, v1 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v5, 0x3284fbcf, v1
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5
; GFX1100-GISEL-NEXT: v_fma_f32 v4, 0x3e9a209a, v0, -v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v4, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_mov_b32 v2, 0
; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
@@ -808,49 +819,51 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6
-; SI-GISEL-NEXT: v_log_f32_e32 v6, v6
-; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, s9, v5
+; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8
-; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2
-; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6
-; SI-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v3, v6, v3
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_fma_f32 v8, v5, v2, -v7
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_fma_f32 v8, v5, v3, v8
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; SI-GISEL-NEXT: v_log_f32_e32 v8, v1
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v8
+; SI-GISEL-NEXT: v_fma_f32 v2, v8, v2, -v5
+; SI-GISEL-NEXT: v_fma_f32 v2, v8, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v5, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v8|, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
@@ -927,12 +940,13 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3
@@ -943,45 +957,46 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, s9, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3
; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v3
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v3, v5
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_log_f32_e32 v6, v1
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v5, v6, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v5
; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v3
; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v6|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v3, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
@@ -1046,49 +1061,51 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v2, -v6
; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6
+; GFX900-GISEL-NEXT: v_ldexp_f32 v6, s9, v6
; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v6
; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8
-; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v2, -v8
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
+; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v6, v3
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v9
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v2, -v6
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v4, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v6, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v9|, v5
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -1156,49 +1173,55 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s6
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v2 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v8, 0x3284fbcf, v2
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v5, v5, v8
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v6, 0x3e9a209a, v0, -v3
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v6, 0x3284fbcf, v0 :: v_dual_lshlrev_b32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
+; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1
; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0
-; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1
-; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_mov_b32 v3, 0
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v9 :: v_dual_sub_f32 v1, v1, v10
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
@@ -1433,62 +1456,65 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf
-; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, -v1
+; SI-GISEL-NEXT: v_fma_f32 v6, v0, v4, v6
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1
; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1
-; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
-; SI-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9
+; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_fma_f32 v8, v1, v3, -v7
+; SI-GISEL-NEXT: v_fma_f32 v8, v1, v4, v8
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
-; SI-GISEL-NEXT: v_log_f32_e32 v9, v9
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 5, v8
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v8, s10, v8
+; SI-GISEL-NEXT: v_log_f32_e32 v8, v8
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
-; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10
-; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
-; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3
-; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
-; SI-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4
-; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v8
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_fma_f32 v9, v8, v3, -v7
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; SI-GISEL-NEXT: v_fma_f32 v9, v8, v4, v9
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9
+; SI-GISEL-NEXT: v_log_f32_e32 v9, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v8|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v7, s[2:3]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v7
+; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v9
+; SI-GISEL-NEXT: v_fma_f32 v3, v9, v3, -v7
+; SI-GISEL-NEXT: v_fma_f32 v3, v9, v4, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v3, v7, v3
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, v5
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[0:1]
; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
;
@@ -1581,12 +1607,13 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1
@@ -1597,62 +1624,64 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
-; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v1, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v7, s10, v7
-; VI-GISEL-NEXT: v_log_f32_e32 v7, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1]
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6
+; VI-GISEL-NEXT: v_ldexp_f32 v6, s10, v6
+; VI-GISEL-NEXT: v_log_f32_e32 v6, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v6
+; VI-GISEL-NEXT: v_sub_f32_e32 v7, v6, v5
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v8
-; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x369a84fb, v6
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9
-; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v8
-; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3]
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
-; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6
; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7
-; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v5
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7
+; VI-GISEL-NEXT: v_log_f32_e32 v7, v2
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v5, s[2:3]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
+; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v7
+; VI-GISEL-NEXT: v_sub_f32_e32 v6, v7, v5
+; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v6
+; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8
; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6
-; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1]
+; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5
+; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v7|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1730,61 +1759,64 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v1
; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v3, -v8
; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v9, 5, v9
+; GFX900-GISEL-NEXT: v_ldexp_f32 v9, s10, v9
; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8
; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8
-; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v3, -v8
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10
+; GFX900-GISEL-NEXT: v_log_f32_e32 v10, v2
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3]
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3
-; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8
-; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4
-; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1]
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v10
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v3, -v8
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v5, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v8, v3
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, v6
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v7, s[0:1]
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v5
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -1860,60 +1892,67 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v2
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
+; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3
+; GFX1100-GISEL-NEXT: v_add_f32_e32 v7, v7, v12
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3
+; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_fma_f32 v10, 0x3e9a209a, v0, -v5
; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7
-; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3
; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2|
-; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_sub_f32 v0, v0, v4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3|
-; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v9 :: v_dual_sub_f32 v2, v2, v14
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: global_store_b128 v5, v[0:3], s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log10_v4f32:
@@ -2126,10 +2165,10 @@ define float @v_log10_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2175,16 +2214,16 @@ define float @v_log10_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -2224,10 +2263,10 @@ define float @v_log10_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2270,21 +2309,22 @@ define float @v_log10_f32(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2329,10 +2369,10 @@ define float @v_log10_fabs_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2378,16 +2418,16 @@ define float @v_log10_fabs_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -2427,10 +2467,10 @@ define float @v_log10_fabs_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2475,20 +2515,22 @@ define float @v_log10_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2534,10 +2576,10 @@ define float @v_log10_fneg_fabs_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2583,16 +2625,16 @@ define float @v_log10_fneg_fabs_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -2632,10 +2674,10 @@ define float @v_log10_fneg_fabs_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2680,20 +2722,22 @@ define float @v_log10_fneg_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2740,10 +2784,10 @@ define float @v_log10_fneg_f32(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2789,16 +2833,16 @@ define float @v_log10_fneg_f32(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -2838,10 +2882,10 @@ define float @v_log10_fneg_f32(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -2885,20 +2929,22 @@ define float @v_log10_fneg_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3304,10 +3350,10 @@ define float @v_log10_f32_ninf(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -3353,16 +3399,16 @@ define float @v_log10_f32_ninf(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -3402,10 +3448,10 @@ define float @v_log10_f32_ninf(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -3448,21 +3494,22 @@ define float @v_log10_f32_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4038,10 +4085,10 @@ define float @v_log10_f32_nnan(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4087,16 +4134,16 @@ define float @v_log10_f32_nnan(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -4136,10 +4183,10 @@ define float @v_log10_f32_nnan(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4182,21 +4229,22 @@ define float @v_log10_f32_nnan(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4381,10 +4429,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4430,16 +4478,16 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -4479,10 +4527,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4525,21 +4573,22 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4724,10 +4773,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4773,16 +4822,16 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -4822,10 +4871,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4868,21 +4917,22 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4924,10 +4974,10 @@ define float @v_log10_f32_nnan_ninf(float %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -4967,16 +5017,16 @@ define float @v_log10_f32_nnan_ninf(float %in) {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -5010,10 +5060,10 @@ define float @v_log10_f32_nnan_ninf(float %in) {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -5051,18 +5101,20 @@ define float @v_log10_f32_nnan_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5207,10 +5259,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -5250,16 +5302,16 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0
; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -5293,10 +5345,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -5334,18 +5386,20 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5419,10 +5473,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -5468,16 +5522,16 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -5517,10 +5571,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
@@ -5563,21 +5617,22 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index 2c5a9f58a199e0..8b3b79b0b1bdd7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -36,14 +36,14 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
; SI-GISEL-NEXT: s_load_dword s2, s[4:5], 0xb
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s2, v0
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -74,13 +74,13 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -108,20 +108,19 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
;
; GFX900-GISEL-LABEL: s_log2_f32:
; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
-; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -147,20 +146,22 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) {
;
; GFX1100-GISEL-LABEL: s_log2_f32:
; GFX1100-GISEL: ; %bb.0:
-; GFX1100-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
+; GFX1100-GISEL-NEXT: s_clause 0x1
+; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0
-; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log2_f32:
@@ -242,21 +243,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; SI-GISEL-NEXT: v_log_f32_e32 v3, v3
-; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s6, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_log_f32_e32 v3, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -291,21 +293,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0
-; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
-; VI-GISEL-NEXT: v_log_f32_e32 v1, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v2, s6, v2
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4
; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5
; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -339,22 +342,23 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, s10, v3
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v3, s10, v3
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v3
-; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
+; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -387,23 +391,28 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in)
; GFX1100-GISEL-LABEL: s_log2_v2f32:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v3 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2
+; GFX1100-GISEL-NEXT: global_store_b64 v4, v[0:1], s[0:1]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log2_v2f32:
@@ -506,32 +515,34 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
;
; SI-GISEL-LABEL: s_log2_v3f32:
; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
-; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, s9, v3
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
-; SI-GISEL-NEXT: v_log_f32_e32 v4, v4
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1
-; SI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; SI-GISEL-NEXT: v_log_f32_e32 v4, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2
; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; SI-GISEL-NEXT: s_endpgm
@@ -571,32 +582,34 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
;
; VI-GISEL-LABEL: s_log2_v3f32:
; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
-; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1
-; VI-GISEL-NEXT: v_log_f32_e32 v4, v4
-; VI-GISEL-NEXT: v_log_f32_e32 v2, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5
+; VI-GISEL-NEXT: v_log_f32_e32 v3, v3
+; VI-GISEL-NEXT: v_log_f32_e32 v4, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2
; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; VI-GISEL-NEXT: s_endpgm
;
@@ -637,28 +650,30 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4
+; GFX900-GISEL-NEXT: v_ldexp_f32 v4, s1, v4
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s2, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4
-; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2
; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -702,33 +717,40 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in)
;
; GFX1100-GISEL-LABEL: s_log2_v3f32:
; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_clause 0x1
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v6, 0
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s3
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
-; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX1100-GISEL-NEXT: global_store_b96 v6, v[0:2], s[4:5]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log2_v3f32:
@@ -865,34 +887,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1
; SI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1]
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; SI-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; SI-GISEL-NEXT: v_log_f32_e32 v5, v5
-; SI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2
-; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; SI-GISEL-NEXT: s_mov_b32 s6, -1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v4, s10, v4
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2
+; SI-GISEL-NEXT: v_log_f32_e32 v4, v4
+; SI-GISEL-NEXT: v_log_f32_e32 v5, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2
+; SI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3
; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000
; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-GISEL-NEXT: s_endpgm
@@ -942,33 +967,36 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; VI-GISEL-NEXT: v_log_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1]
; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; VI-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
-; VI-GISEL-NEXT: v_log_f32_e32 v5, v5
-; VI-GISEL-NEXT: v_log_f32_e32 v3, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; VI-GISEL-NEXT: v_ldexp_f32 v4, s10, v4
+; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
+; VI-GISEL-NEXT: v_log_f32_e32 v4, v4
+; VI-GISEL-NEXT: v_log_f32_e32 v5, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3
; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3
; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2
; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
@@ -1018,34 +1046,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34
; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v3, s[0:1]
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1]
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v5, s10, v5
+; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2
; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5
-; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1]
+; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1]
; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v6, v3
; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX900-GISEL-NEXT: s_endpgm
;
@@ -1095,39 +1126,46 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in)
;
; GFX1100-GISEL-LABEL: s_log2_v4f32:
; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_clause 0x1
; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24
+; GFX1100-GISEL-NEXT: v_mov_b32_e32 v8, 0
; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
-; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3
-; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1
; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v5 :: v_dual_lshlrev_b32 v0, 5, v0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_lshlrev_b32 v3, 5, v3
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3
; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instid1(VALU_DEP_3)
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0
-; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1100-GISEL-NEXT: v_dual_sub_f32 v3, v3, v7 :: v_dual_lshlrev_b32 v2, 5, v2
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6
+; GFX1100-GISEL-NEXT: global_store_b128 v8, v[0:3], s[4:5]
; GFX1100-GISEL-NEXT: s_endpgm
;
; R600-LABEL: s_log2_v4f32:
@@ -1243,19 +1281,19 @@ define float @v_log2_f32(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32:
; VI-SDAG: ; %bb.0:
@@ -1271,6 +1309,20 @@ define float @v_log2_f32(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1285,6 +1337,20 @@ define float @v_log2_f32(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1304,10 +1370,12 @@ define float @v_log2_f32(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1341,19 +1409,19 @@ define float @v_log2_fabs_f32(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_fabs_f32:
; VI-SDAG: ; %bb.0:
@@ -1369,6 +1437,20 @@ define float @v_log2_fabs_f32(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_fabs_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1383,6 +1465,20 @@ define float @v_log2_fabs_f32(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1403,10 +1499,11 @@ define float @v_log2_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1441,19 +1538,19 @@ define float @v_log2_fneg_fabs_f32(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_fneg_fabs_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_fneg_fabs_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_fneg_fabs_f32:
; VI-SDAG: ; %bb.0:
@@ -1469,6 +1566,20 @@ define float @v_log2_fneg_fabs_f32(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_fneg_fabs_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_fneg_fabs_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1483,6 +1594,20 @@ define float @v_log2_fneg_fabs_f32(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_fneg_fabs_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1503,10 +1628,11 @@ define float @v_log2_fneg_fabs_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1542,19 +1668,19 @@ define float @v_log2_fneg_f32(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_fneg_f32:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_fneg_f32:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_fneg_f32:
; VI-SDAG: ; %bb.0:
@@ -1570,6 +1696,20 @@ define float @v_log2_fneg_f32(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_fneg_f32:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_fneg_f32:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1584,6 +1724,20 @@ define float @v_log2_fneg_f32(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_fneg_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_fneg_f32:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1604,10 +1758,11 @@ define float @v_log2_fneg_f32(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1642,19 +1797,19 @@ define float @v_log2_f32_fast(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_fast:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_fast:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_fast:
; VI-SDAG: ; %bb.0:
@@ -1670,6 +1825,20 @@ define float @v_log2_f32_fast(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_fast:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_fast:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1684,6 +1853,20 @@ define float @v_log2_f32_fast(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_fast:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_fast:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1703,10 +1886,12 @@ define float @v_log2_f32_fast(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1740,19 +1925,19 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
; VI-SDAG: ; %bb.0:
@@ -1768,6 +1953,20 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1782,6 +1981,20 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1801,10 +2014,12 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1838,19 +2053,19 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; VI-SDAG: ; %bb.0:
@@ -1866,6 +2081,20 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1880,6 +2109,20 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1899,10 +2142,12 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -1936,19 +2181,19 @@ define float @v_log2_f32_ninf(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_ninf:
; VI-SDAG: ; %bb.0:
@@ -1964,6 +2209,20 @@ define float @v_log2_f32_ninf(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1978,6 +2237,20 @@ define float @v_log2_f32_ninf(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1997,10 +2270,12 @@ define float @v_log2_f32_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2034,19 +2309,19 @@ define float @v_log2_f32_afn(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_afn:
; VI-SDAG: ; %bb.0:
@@ -2062,6 +2337,20 @@ define float @v_log2_f32_afn(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2076,6 +2365,20 @@ define float @v_log2_f32_afn(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2095,10 +2398,12 @@ define float @v_log2_f32_afn(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2158,19 +2463,19 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_afn_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2186,6 +2491,20 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_afn_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2200,6 +2519,20 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2219,10 +2552,12 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2256,19 +2591,19 @@ define float @v_fabs_log2_f32_afn(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_fabs_log2_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_fabs_log2_f32_afn:
; VI-SDAG: ; %bb.0:
@@ -2284,6 +2619,20 @@ define float @v_fabs_log2_f32_afn(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_fabs_log2_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_fabs_log2_f32_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2298,6 +2647,20 @@ define float @v_fabs_log2_f32_afn(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2318,10 +2681,11 @@ define float @v_fabs_log2_f32_afn(float %in) {
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2382,19 +2746,19 @@ define float @v_log2_f32_nnan(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_nnan:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_nnan:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_nnan:
; VI-SDAG: ; %bb.0:
@@ -2410,6 +2774,20 @@ define float @v_log2_f32_nnan(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_nnan:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_nnan:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2424,6 +2802,20 @@ define float @v_log2_f32_nnan(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2443,10 +2835,12 @@ define float @v_log2_f32_nnan(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2506,19 +2900,19 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_nnan_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_nnan_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_nnan_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2534,6 +2928,20 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_nnan_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_nnan_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2548,6 +2956,20 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_nnan_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2567,10 +2989,12 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2630,19 +3054,19 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_ninf_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2658,6 +3082,20 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2672,6 +3110,20 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2691,10 +3143,12 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2728,19 +3182,19 @@ define float @v_log2_f32_nnan_ninf(float %in) {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_nnan_ninf:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_nnan_ninf:
; VI-SDAG: ; %bb.0:
@@ -2756,6 +3210,20 @@ define float @v_log2_f32_nnan_ninf(float %in) {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_nnan_ninf:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2770,6 +3238,20 @@ define float @v_log2_f32_nnan_ninf(float %in) {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2789,10 +3271,12 @@ define float @v_log2_f32_nnan_ninf(float %in) {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2852,19 +3336,19 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic:
; VI-SDAG: ; %bb.0:
@@ -2880,6 +3364,20 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2894,6 +3392,20 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2913,10 +3425,12 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -2976,19 +3490,19 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX689-GISEL-LABEL: v_log2_f32_dynamic_mode:
-; GFX689-GISEL: ; %bb.0:
-; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
-; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
-; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
-; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
-; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log2_f32_dynamic_mode:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log2_f32_dynamic_mode:
; VI-SDAG: ; %bb.0:
@@ -3004,6 +3518,20 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; VI-GISEL-LABEL: v_log2_f32_dynamic_mode:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX900-SDAG-LABEL: v_log2_f32_dynamic_mode:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3018,6 +3546,20 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
+; GFX900-GISEL-LABEL: v_log2_f32_dynamic_mode:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode:
; GFX1100-SDAG: ; %bb.0:
; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3037,10 +3579,12 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 {
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1
+; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1
; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
index ba428df273db5f..a439f8df10a26f 100644
--- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
@@ -3,32 +3,17 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_cs float @v_s_exp_f32(float inreg %src) {
-; GFX12-SDAG-LABEL: v_s_exp_f32:
-; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
-; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42800000, 0
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-SDAG-NEXT: s_add_f32 s0, s0, s1
-; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
-; GFX12-SDAG-NEXT: v_s_exp_f32 s0, s0
-; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
-; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, s1
-; GFX12-SDAG-NEXT: ; return to shader part epilog
-;
-; GFX12-GISEL-LABEL: v_s_exp_f32:
-; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42800000, 0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: s_add_f32 s0, s0, s1
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0
-; GFX12-GISEL-NEXT: v_s_exp_f32 s0, s0
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: ; return to shader part epilog
+; GFX12-LABEL: v_s_exp_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
+; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-NEXT: s_add_f32 s0, s0, s1
+; GFX12-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
+; GFX12-NEXT: v_s_exp_f32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+; GFX12-NEXT: v_ldexp_f32 v0, s0, s1
+; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.exp2.f32(float %src)
ret float %result
}
@@ -88,16 +73,16 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) {
; GFX12-GISEL-LABEL: v_s_log_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0
-; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1
+; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5
+; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12-GISEL-NEXT: v_ldexp_f32 v0, s0, s2
+; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0
+; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%result = call float @llvm.log2.f32(float %src)
ret float %result
@@ -322,19 +307,18 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
;
; GFX12-GISEL-LABEL: srcmods_abs_f32:
; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_bitset0_b32 s0, 31
+; GFX12-GISEL-NEXT: s_and_b32 s1, s0, 0x7fffffff
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
-; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1
+; GFX12-GISEL-NEXT: s_cmp_lt_f32 s1, 0x800000
+; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5
+; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12-GISEL-NEXT: v_ldexp_f32 v0, |s0|, s2
+; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%abs = call float @llvm.fabs.f32(float %src)
%result = call float @llvm.log2.f32(float %abs)
@@ -362,19 +346,18 @@ define amdgpu_cs float @srcmods_neg_f32(float inreg %src) {
;
; GFX12-GISEL-LABEL: srcmods_neg_f32:
; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x80000000
+; GFX12-GISEL-NEXT: s_xor_b32 s1, s0, 0x80000000
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
-; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
-; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1
+; GFX12-GISEL-NEXT: s_cmp_lt_f32 s1, 0x800000
+; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0
+; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5
+; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12-GISEL-NEXT: v_ldexp_f32 v0, -s0, s2
+; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%neg = fneg float %src
%result = call float @llvm.log2.f32(float %neg)
>From e891e172e0f33504cd7a3144540dec581041dad9 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Tue, 24 Dec 2024 06:56:44 +0000
Subject: [PATCH 4/6] Addressed suggested reviewed changes.
---
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index 06da76d5049e5a..e3a5e20f4e537c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -451,6 +451,7 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
std::function<void(MachineIRBuilder &)> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_FMUL);
assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
+ assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
Register Dst = MI.getOperand(0).getReg();
LLT DestTy = MRI.getType(Dst);
@@ -501,14 +502,13 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
Builder.buildConstant(IntDestTy, SelectTrueVal),
Builder.buildConstant(IntDestTy, SelectFalseVal));
+ Register XReg = MI.getOperand(1).getReg();
if (SelectTrueCst->Value.isNegative()) {
- auto NegX = Builder.buildFNeg(
- DestTy, MI.getOperand(1).getReg(),
- MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags());
+ auto NegX =
+ Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
} else {
- Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel,
- MI.getFlags());
+ Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
}
};
>From e2c23aefbaafd7c0158017202f9865fdbfbeb47c Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Tue, 24 Dec 2024 08:10:02 +0000
Subject: [PATCH 5/6] Added constructor with TII in AMDGPUCombinerHelper to
propagate it from Pre/Post legalizeCombiner.
---
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 13 +++++++++----
llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 9 +++++++++
.../Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 2 +-
.../Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 2 +-
4 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e3a5e20f4e537c..3be39e72ccffba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -17,6 +17,13 @@
using namespace llvm;
using namespace MIPatternMatch;
+AMDGPUCombinerHelper::AMDGPUCombinerHelper(
+ GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI)
+ : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
+ TII(*STI.getInstrInfo()) {}
+
LLVM_READNONE
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -481,11 +488,9 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
return false;
// For f32, only non-inline constants should be transformed.
- const SIInstrInfo *TII =
- (MI.getMF()->getSubtarget<GCNSubtarget>()).getInstrInfo();
if (ScalarDestTy == LLT::float32() &&
- TII->isInlineConstant(SelectTrueCst->Value) &&
- TII->isInlineConstant(SelectFalseCst->Value))
+ TII.isInlineConstant(SelectTrueCst->Value) &&
+ TII.isInlineConstant(SelectFalseCst->Value))
return false;
int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 9a0a4205ed54be..893b3f5415f8c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -15,13 +15,22 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
namespace llvm {
class AMDGPUCombinerHelper : public CombinerHelper {
+protected:
+ const GCNSubtarget &STI;
+ const SIInstrInfo &TII;
+
public:
using CombinerHelper::CombinerHelper;
+ AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ bool IsPreLegalize, GISelKnownBits *KB,
+ MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI);
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 54d927c33fc553..888817e52e35d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
TII(*STI.getInstrInfo()),
- Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index ff8189ce31f7f7..e1564d5de415da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
- Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
>From d93bdedd59e2793a852bcd5292bf0418fc2b2c66 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Fri, 27 Dec 2024 08:36:49 +0000
Subject: [PATCH 6/6] Utilized isConstantOrConstantSplatFP API in matchCombine
function.
---
.../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 48 +++++++++----------
1 file changed, 22 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index 3be39e72ccffba..8ca3c1747d20a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -467,48 +467,44 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() ||
ScalarDestTy == LLT::float16()) &&
(MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) {
- Register SelectCond = Sel.getOperand(1).getReg();
- Register SelectTrue = Sel.getOperand(2).getReg();
- Register SelectFalse = Sel.getOperand(3).getReg();
-
- const auto SelectTrueCst =
- DestTy.isVector()
- ? getFConstantSplat(SelectTrue, MRI, /*allowUndef=*/false)
- : getFConstantVRegValWithLookThrough(SelectTrue, MRI);
- if (!SelectTrueCst)
+ Register SelectCondReg = Sel.getOperand(1).getReg();
+ MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
+ MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
+
+ const auto SelectTrueVal =
+ isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);
+ if (!SelectTrueVal)
return false;
- const auto SelectFalseCst =
- DestTy.isVector()
- ? getFConstantSplat(SelectFalse, MRI, /*allowUndef=*/false)
- : getFConstantVRegValWithLookThrough(SelectFalse, MRI);
- if (!SelectFalseCst)
+ const auto SelectFalseVal =
+ isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);
+ if (!SelectFalseVal)
return false;
- if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative())
+ if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
return false;
// For f32, only non-inline constants should be transformed.
if (ScalarDestTy == LLT::float32() &&
- TII.isInlineConstant(SelectTrueCst->Value) &&
- TII.isInlineConstant(SelectFalseCst->Value))
+ TII.isInlineConstant(*SelectTrueVal) &&
+ TII.isInlineConstant(*SelectFalseVal))
return false;
- int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs();
- if (SelectTrueVal == INT_MIN)
+ int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
+ if (SelectTrueLog2Val == INT_MIN)
return false;
- int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs();
- if (SelectFalseVal == INT_MIN)
+ int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
+ if (SelectFalseLog2Val == INT_MIN)
return false;
MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
- auto NewSel =
- Builder.buildSelect(IntDestTy, SelectCond,
- Builder.buildConstant(IntDestTy, SelectTrueVal),
- Builder.buildConstant(IntDestTy, SelectFalseVal));
+ auto NewSel = Builder.buildSelect(
+ IntDestTy, SelectCondReg,
+ Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
+ Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
Register XReg = MI.getOperand(1).getReg();
- if (SelectTrueCst->Value.isNegative()) {
+ if (SelectTrueVal->isNegative()) {
auto NegX =
Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
More information about the llvm-commits
mailing list