[llvm] 448ac1f - AMDGPU/GlobalISel: Fix broken exp10 lowering for f16 (#170708)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 01:35:44 PST 2025
Author: Petar Avramovic
Date: 2025-12-08T10:35:40+01:00
New Revision: 448ac1fb00df7aadc207dc3f5e87d7dcb5b933c5
URL: https://github.com/llvm/llvm-project/commit/448ac1fb00df7aadc207dc3f5e87d7dcb5b933c5
DIFF: https://github.com/llvm/llvm-project/commit/448ac1fb00df7aadc207dc3f5e87d7dcb5b933c5.diff
LOG: AMDGPU/GlobalISel: Fix broken exp10 lowering for f16 (#170708)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 6e3a1b6a5563f..cb1a4ee6d542e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3728,24 +3728,39 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
return true;
}
+static MachineInstrBuilder buildExp(MachineIRBuilder &B, const DstOp &Dst,
+ const SrcOp &Src, unsigned Flags) {
+ LLT Ty = Dst.getLLTTy(*B.getMRI());
+
+ if (Ty == LLT::scalar(32)) {
+ return B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Dst})
+ .addUse(Src.getReg())
+ .setMIFlags(Flags);
+ }
+ return B.buildFExp2(Dst, Src, Flags);
+}
+
+bool AMDGPULegalizerInfo::legalizeFExpUnsafeImpl(MachineIRBuilder &B,
+ Register Dst, Register X,
+ unsigned Flags,
+ bool IsExp10) const {
+ LLT Ty = B.getMRI()->getType(X);
+
+ // exp(x) -> exp2(M_LOG2E_F * x);
+ // exp10(x) -> exp2(log2(10) * x);
+ auto Const = B.buildFConstant(Ty, IsExp10 ? 0x1.a934f0p+1f : numbers::log2e);
+ auto Mul = B.buildFMul(Ty, X, Const, Flags);
+ buildExp(B, Dst, Mul, Flags);
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
Register X, unsigned Flags) const {
LLT Ty = B.getMRI()->getType(Dst);
LLT F32 = LLT::scalar(32);
if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) {
- auto Log2E = B.buildFConstant(Ty, numbers::log2e);
- auto Mul = B.buildFMul(Ty, X, Log2E, Flags);
-
- if (Ty == F32) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
- .addUse(Mul.getReg(0))
- .setMIFlags(Flags);
- } else {
- B.buildFExp2(Dst, Mul.getReg(0), Flags);
- }
-
- return true;
+ return legalizeFExpUnsafeImpl(B, Dst, X, Flags, /*IsExp10=*/false);
}
auto Threshold = B.buildFConstant(Ty, -0x1.5d58a0p+6f);
@@ -3768,6 +3783,55 @@ bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFExp10Unsafe(MachineIRBuilder &B,
+ Register Dst, Register X,
+ unsigned Flags) const {
+ LLT Ty = B.getMRI()->getType(Dst);
+ LLT F32 = LLT::scalar(32);
+
+ if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) {
+ // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
+ auto K0 = B.buildFConstant(Ty, 0x1.a92000p+1f);
+ auto K1 = B.buildFConstant(Ty, 0x1.4f0978p-11f);
+
+ auto Mul1 = B.buildFMul(Ty, X, K1, Flags);
+ auto Exp2_1 = buildExp(B, Ty, Mul1, Flags);
+ auto Mul0 = B.buildFMul(Ty, X, K0, Flags);
+ auto Exp2_0 = buildExp(B, Ty, Mul0, Flags);
+ B.buildFMul(Dst, Exp2_0, Exp2_1, Flags);
+ return true;
+ }
+
+ // bool s = x < -0x1.2f7030p+5f;
+ // x += s ? 0x1.0p+5f : 0.0f;
+ // exp10 = exp2(x * 0x1.a92000p+1f) *
+ // exp2(x * 0x1.4f0978p-11f) *
+ // (s ? 0x1.9f623ep-107f : 1.0f);
+
+ auto Threshold = B.buildFConstant(Ty, -0x1.2f7030p+5f);
+ auto NeedsScaling =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, Threshold);
+
+ auto ScaleOffset = B.buildFConstant(Ty, 0x1.0p+5f);
+ auto ScaledX = B.buildFAdd(Ty, X, ScaleOffset, Flags);
+ auto AdjustedX = B.buildSelect(Ty, NeedsScaling, ScaledX, X);
+
+ auto K0 = B.buildFConstant(Ty, 0x1.a92000p+1f);
+ auto K1 = B.buildFConstant(Ty, 0x1.4f0978p-11f);
+
+ auto Mul1 = B.buildFMul(Ty, AdjustedX, K1, Flags);
+ auto Exp2_1 = buildExp(B, Ty, Mul1, Flags);
+ auto Mul0 = B.buildFMul(Ty, AdjustedX, K0, Flags);
+ auto Exp2_0 = buildExp(B, Ty, Mul0, Flags);
+
+ auto MulExps = B.buildFMul(Ty, Exp2_0, Exp2_1, Flags);
+ auto ResultScaleFactor = B.buildFConstant(Ty, 0x1.9f623ep-107f);
+ auto AdjustedResult = B.buildFMul(Ty, MulExps, ResultScaleFactor, Flags);
+
+ B.buildSelect(Dst, NeedsScaling, AdjustedResult, MulExps);
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
MachineIRBuilder &B) const {
Register Dst = MI.getOperand(0).getReg();
@@ -3784,18 +3848,22 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
// v_exp_f16 (fmul x, log2e)
if (allowApproxFunc(MF, Flags)) {
// TODO: Does this really require fast?
- legalizeFExpUnsafe(B, Dst, X, Flags);
+ IsExp10 ? legalizeFExp10Unsafe(B, Dst, X, Flags)
+ : legalizeFExpUnsafe(B, Dst, X, Flags);
MI.eraseFromParent();
return true;
}
+ // Nothing in half is a denormal when promoted to f32.
+ //
// exp(f16 x) ->
// fptrunc (v_exp_f32 (fmul (fpext x), log2e))
-
- // Nothing in half is a denormal when promoted to f32.
+ //
+ // exp10(f16 x) ->
+ // fptrunc (v_exp_f32 (fmul (fpext x), log2(10)))
auto Ext = B.buildFPExt(F32, X, Flags);
Register Lowered = MRI.createGenericVirtualRegister(F32);
- legalizeFExpUnsafe(B, Lowered, Ext.getReg(0), Flags);
+ legalizeFExpUnsafeImpl(B, Lowered, Ext.getReg(0), Flags, IsExp10);
B.buildFPTrunc(Dst, Lowered, Flags);
MI.eraseFromParent();
return true;
@@ -3806,7 +3874,8 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
// library behavior. Also, is known-not-daz source sufficient?
if (allowApproxFunc(MF, Flags)) {
- legalizeFExpUnsafe(B, Dst, X, Flags);
+ IsExp10 ? legalizeFExp10Unsafe(B, Dst, X, Flags)
+ : legalizeFExpUnsafe(B, Dst, X, Flags);
MI.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 31db548d2af88..1224ee7ac60be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -91,8 +91,12 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
bool IsLog10, unsigned Flags) const;
bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFExpUnsafeImpl(MachineIRBuilder &B, Register Dst, Register Src,
+ unsigned Flags, bool IsExp10) const;
bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
unsigned Flags) const;
+ bool legalizeFExp10Unsafe(MachineIRBuilder &B, Register Dst, Register Src,
+ unsigned Flags) const;
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 8860d3276d912..574b1c0b4974c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -3182,13 +3182,16 @@ define float @v_exp10_f32_fast(float %in) {
; GCN-GISEL-LABEL: v_exp10_f32_fast:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3211,13 +3214,16 @@ define float @v_exp10_f32_fast(float %in) {
; SI-GISEL-LABEL: v_exp10_f32_fast:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3388,13 +3394,16 @@ define float @v_exp10_f32_afn(float %in) {
; GCN-GISEL-LABEL: v_exp10_f32_afn:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3417,13 +3426,16 @@ define float @v_exp10_f32_afn(float %in) {
; SI-GISEL-LABEL: v_exp10_f32_afn:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3441,39 +3453,25 @@ define float @v_exp10_f32_afn(float %in) {
}
define float @v_exp10_f32_afn_daz(float %in) #0 {
-; GCN-SDAG-LABEL: v_exp10_f32_afn_daz:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_f32_afn_daz:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-SDAG-LABEL: v_exp10_f32_afn_daz:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_f32_afn_daz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-NEXT: v_exp_f32_e32 v1, v1
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_exp10_f32_afn_daz:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-LABEL: v_exp10_f32_afn_daz:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-NEXT: v_exp_f32_e32 v1, v1
+; SI-NEXT: v_exp_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp10_f32_afn_daz:
; R600: ; %bb.0:
@@ -3508,13 +3506,16 @@ define float @v_exp10_f32_afn_dynamic(float %in) #1 {
; GCN-GISEL-LABEL: v_exp10_f32_afn_dynamic:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3537,13 +3538,16 @@ define float @v_exp10_f32_afn_dynamic(float %in) #1 {
; SI-GISEL-LABEL: v_exp10_f32_afn_dynamic:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42000000, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3581,14 +3585,17 @@ define float @v_fabs_exp10_f32_afn(float %in) {
; GCN-GISEL-LABEL: v_fabs_exp10_f32_afn:
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3612,14 +3619,17 @@ define float @v_fabs_exp10_f32_afn(float %in) {
; SI-GISEL-LABEL: v_fabs_exp10_f32_afn:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc217b818
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000
; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0xa4fb11f, v0
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -4940,39 +4950,25 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
}
define float @v_exp10_f32_fast_daz(float %in) #0 {
-; GCN-SDAG-LABEL: v_exp10_f32_fast_daz:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_f32_fast_daz:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-SDAG-LABEL: v_exp10_f32_fast_daz:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_f32_fast_daz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-NEXT: v_exp_f32_e32 v1, v1
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_exp10_f32_fast_daz:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+; SI-LABEL: v_exp10_f32_fast_daz:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-NEXT: v_exp_f32_e32 v1, v1
+; SI-NEXT: v_exp_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp10_f32_fast_daz:
; R600: ; %bb.0:
@@ -5628,26 +5624,17 @@ define float @v_exp10_f32_from_fpext_bf16(bfloat %src) {
}
define float @v_exp10_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
-; GCN-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_fast:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_f32_from_fpext_math_f16_fast:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_f32_from_fpext_math_f16_fast:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_f16_e32 v0, v0, v1
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-NEXT: v_exp_f32_e32 v1, v1
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_fast:
; SI-SDAG: ; %bb.0:
@@ -5676,8 +5663,11 @@ define float @v_exp10_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp10_f32_from_fpext_math_f16_fast:
@@ -5877,23 +5867,14 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
define half @v_exp10_f16(half %in) {
-; GCN-SDAG-LABEL: v_exp10_f16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_f16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_f16:
; SI-SDAG: ; %bb.0:
@@ -5910,7 +5891,7 @@ define half @v_exp10_f16(half %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -5929,23 +5910,14 @@ define half @v_exp10_f16(half %in) {
}
define half @v_exp10_fabs_f16(half %in) {
-; GCN-SDAG-LABEL: v_exp10_fabs_f16:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
-; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_fabs_f16:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_fabs_f16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GCN-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_fabs_f16:
; SI-SDAG: ; %bb.0:
@@ -5962,7 +5934,7 @@ define half @v_exp10_fabs_f16(half %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -5995,7 +5967,7 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -6015,7 +5987,7 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -6049,7 +6021,7 @@ define half @v_exp10_fneg_f16(half %in) {
; GCN-GISEL: ; %bb.0:
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -6069,7 +6041,7 @@ define half @v_exp10_fneg_f16(half %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -6089,22 +6061,15 @@ define half @v_exp10_fneg_f16(half %in) {
}
define half @v_exp10_f16_fast(half %in) {
-; GCN-SDAG-LABEL: v_exp10_f16_fast:
-; GCN-SDAG: ; %bb.0:
-; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_mul_f16_e32 v1, 0x113c, v0
-; GCN-SDAG-NEXT: v_mul_f16_e32 v0, 0x42a4, v0
-; GCN-SDAG-NEXT: v_exp_f16_e32 v1, v1
-; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
-; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GCN-GISEL-LABEL: v_exp10_f16_fast:
-; GCN-GISEL: ; %bb.0:
-; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-GISEL-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
-; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_exp10_f16_fast:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f16_e32 v1, 0x113c, v0
+; GCN-NEXT: v_mul_f16_e32 v0, 0x42a4, v0
+; GCN-NEXT: v_exp_f16_e32 v1, v1
+; GCN-NEXT: v_exp_f16_e32 v0, v0
+; GCN-NEXT: v_mul_f16_e32 v0, v0, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_f16_fast:
; SI-SDAG: ; %bb.0:
@@ -6122,10 +6087,19 @@ define half @v_exp10_f16_fast(half %in) {
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3a278000, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6162,8 +6136,8 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6171,33 +6145,19 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-SDAG-LABEL: v_exp10_v2f16:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp10_v2f16:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_exp10_v2f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; GFX900-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v2f16:
; SI-SDAG: ; %bb.0:
@@ -6221,8 +6181,8 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
@@ -6263,8 +6223,8 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6292,8 +6252,8 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6328,9 +6288,9 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
@@ -6371,8 +6331,8 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6400,8 +6360,8 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6440,9 +6400,9 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
@@ -6484,8 +6444,8 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6513,8 +6473,8 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
@@ -6553,9 +6513,9 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
@@ -6597,11 +6557,18 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; VI-GISEL-LABEL: v_exp10_v2f16_fast:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x113c
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42a4
+; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x113c, v0
+; VI-GISEL-NEXT: v_mul_f16_e32 v4, 0x42a4, v0
+; VI-GISEL-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_exp_f16_e32 v4, v4
+; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
+; VI-GISEL-NEXT: v_exp_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mul_f16_e32 v2, v4, v2
+; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -6626,12 +6593,19 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; GFX900-GISEL-LABEL: v_exp10_v2f16_fast:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x113c
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42a4
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x113c, v0
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v4, 0x42a4, v0
+; GFX900-GISEL-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2
+; GFX900-GISEL-NEXT: v_exp_f16_e32 v4, v4
+; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, v4, v2
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v2f16_fast:
@@ -6658,16 +6632,34 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3a278000, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3a278000, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40548000, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp10_v2f16_fast:
@@ -6684,77 +6676,41 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
}
define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
-; VI-SDAG-LABEL: v_exp10_v3f16:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
-; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549a78, v2
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
-; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp10_v3f16:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0
-; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp10_v3f16:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549a78, v2
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_exp10_v3f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v0
+; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, 0x40549a78, v2
+; VI-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; VI-NEXT: v_exp_f32_e32 v2, v2
+; VI-NEXT: v_exp_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-NEXT: v_exp_f32_e32 v1, v1
+; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-NEXT: v_or_b32_e32 v0, v2, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-GISEL-LABEL: v_exp10_v3f16:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_exp10_v3f16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX900-NEXT: v_mul_f32_e32 v2, 0x40549a78, v2
+; GFX900-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; GFX900-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v3f16:
; SI-SDAG: ; %bb.0:
@@ -6785,9 +6741,9 @@ define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40549a78, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40549a78, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40549a78, v2
; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
@@ -6836,14 +6792,24 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; VI-GISEL-LABEL: v_exp10_v3f16_afn:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0
-; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x113c
+; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42a4
+; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x113c, v0
+; VI-GISEL-NEXT: v_mul_f16_e32 v5, 0x42a4, v0
+; VI-GISEL-NEXT: v_mul_f16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-GISEL-NEXT: v_exp_f16_e32 v3, v3
-; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
+; VI-GISEL-NEXT: v_exp_f16_e32 v5, v5
+; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f16_e32 v0, v0
+; VI-GISEL-NEXT: v_mul_f16_e32 v4, 0x113c, v1
+; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x42a4, v1
+; VI-GISEL-NEXT: v_exp_f16_e32 v4, v4
; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1
+; VI-GISEL-NEXT: v_mul_f16_e32 v3, v5, v3
+; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v0, v3, v0
+; VI-GISEL-NEXT: v_mul_f16_e32 v1, v1, v4
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_exp10_v3f16_afn:
@@ -6872,14 +6838,24 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; GFX900-GISEL-LABEL: v_exp10_v3f16_afn:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0
-; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x113c
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42a4
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x113c, v0
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v5, 0x42a4, v0
+; GFX900-GISEL-NEXT: v_mul_f16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-GISEL-NEXT: v_exp_f16_e32 v3, v3
+; GFX900-GISEL-NEXT: v_exp_f16_e32 v5, v5
+; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2
; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v4, 0x113c, v1
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x42a4, v1
+; GFX900-GISEL-NEXT: v_exp_f16_e32 v4, v4
; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1
-; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, v5, v3
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v3, v0
+; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, v1, v4
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v3f16_afn:
@@ -6926,19 +6902,46 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
-; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3a278000, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3a278000, v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x40548000, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3a278000, v2
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x40548000, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v5
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v4, v4
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v4
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
More information about the llvm-commits
mailing list