[llvm] 52113cf - AMDGPU: Fix broken exp10 lowering for f16 (#170582)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 01:47:38 PST 2025
Author: Matt Arsenault
Date: 2025-12-04T10:47:33+01:00
New Revision: 52113cf14f8f8163d921e2ed7be9378be2b1a857
URL: https://github.com/llvm/llvm-project/commit/52113cf14f8f8163d921e2ed7be9378be2b1a857
DIFF: https://github.com/llvm/llvm-project/commit/52113cf14f8f8163d921e2ed7be9378be2b1a857.diff
LOG: AMDGPU: Fix broken exp10 lowering for f16 (#170582)
This was calling the exp handling, so multiplying by the wrong
constant.
GlobalISel is still broken, but missing the fast exp10 path.
This is tracked in https://github.com/llvm/llvm-project/issues/170576
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 971dfdbe3e70a..5be5c66ba17dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3055,8 +3055,11 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
if (VT.getScalarType() == MVT::f16) {
// v_exp_f16 (fmul x, log2e)
- if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
- return lowerFEXPUnsafe(X, SL, DAG, Flags);
+
+ if (allowApproxFunc(DAG, Flags)) { // TODO: Does this really require fast?
+ return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
+ : lowerFEXPUnsafe(X, SL, DAG, Flags);
+ }
if (VT.isVector())
return SDValue();
@@ -3066,7 +3069,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
// Nothing in half is a denormal when promoted to f32.
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
- SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
+ SDValue Lowered = IsExp10 ? lowerFEXP10Unsafe(Ext, SL, DAG, Flags)
+ : lowerFEXPUnsafe(Ext, SL, DAG, Flags);
return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
DAG.getTargetConstant(0, SL, MVT::i32), Flags);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 3928ec2dd76d3..8a0e02664fc6c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -5877,22 +5877,37 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
define half @v_exp10_f16(half %in) {
-; GCN-LABEL: v_exp10_f16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_f16:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_f16:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5920,22 +5935,37 @@ define half @v_exp10_f16(half %in) {
}
define half @v_exp10_fabs_f16(half %in) {
-; GCN-LABEL: v_exp10_fabs_f16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_fabs_f16:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_fabs_f16:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_fabs_f16:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5967,9 +5997,12 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
; GCN-SDAG-LABEL: v_exp10_fneg_fabs_f16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5987,8 +6020,11 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xba2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xc0549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6021,9 +6057,12 @@ define half @v_exp10_fneg_f16(half %in) {
; GCN-SDAG-LABEL: v_exp10_fneg_f16:
; GCN-SDAG: ; %bb.0:
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v1, v1
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -6041,8 +6080,11 @@ define half @v_exp10_fneg_f16(half %in) {
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6071,20 +6113,33 @@ define half @v_exp10_fneg_f16(half %in) {
}
define half @v_exp10_f16_fast(half %in) {
-; GCN-LABEL: v_exp10_f16_fast:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
-; GCN-NEXT: v_exp_f16_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_f16_fast:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: v_mul_f16_e32 v1, 0x113c, v0
+; GCN-SDAG-NEXT: v_mul_f16_e32 v0, 0x42a4, v0
+; GCN-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; GCN-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_f16_fast:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
+; GCN-GISEL-NEXT: v_exp_f16_e32 v0, v0
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_f16_fast:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3a278000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp10_f16_fast:
@@ -6117,11 +6172,17 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6140,19 +6201,39 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_exp10_v2f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0
-; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_exp10_v2f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp10_v2f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v2f16:
; SI-SDAG: ; %bb.0:
@@ -6161,11 +6242,17 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
@@ -6203,11 +6290,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6232,11 +6325,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0|
; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6263,11 +6362,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
@@ -6311,11 +6416,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6340,11 +6451,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0|
; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6375,11 +6492,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
@@ -6424,11 +6547,17 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6453,11 +6582,17 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0
; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -6488,11 +6623,17 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a2784bc, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2
@@ -6534,11 +6675,18 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; VI-SDAG-LABEL: v_exp10_v2f16_fast:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x113c
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42a4
; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0
-; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f16_e32 v3, 0x113c, v0
+; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x42a4, v0
+; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f16_e32 v2, v2
+; VI-SDAG-NEXT: v_exp_f16_e32 v3, v3
; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f16_e32 v0, v0, v3
; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -6556,11 +6704,18 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; GFX900-SDAG-LABEL: v_exp10_v2f16_fast:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v0
-; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x113c
+; GFX900-SDAG-NEXT: s_movk_i32 s5, 0x42a4
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x113c, v0
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x42a4, v0
+; GFX900-SDAG-NEXT: v_mul_f16_sdwa v3, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v3, v3
; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, v0, v3
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -6582,10 +6737,16 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3a278000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a278000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40548000, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp10_v2f16_fast:
@@ -6619,41 +6780,95 @@ define <2 x half> @v_exp10_v2f16_fast(<2 x half> %in) {
}
define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
-; VI-LABEL: v_exp10_v3f16:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_cvt_f32_f16_e32 v2, v0
-; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; VI-NEXT: v_exp_f32_e32 v2, v2
-; VI-NEXT: v_exp_f32_e32 v0, v0
-; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-NEXT: v_exp_f32_e32 v1, v1
-; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
+; VI-SDAG-LABEL: v_exp10_v3f16:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
+; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3
+; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-LABEL: v_exp10_v3f16:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0
-; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
-; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-NEXT: v_exp_f32_e32 v0, v0
-; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; VI-GISEL-LABEL: v_exp10_v3f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0
+; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_exp10_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v5
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp10_v3f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp10_v3f16:
; SI-SDAG: ; %bb.0:
@@ -6664,14 +6879,23 @@ define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a2784bc, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40549000, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a2784bc, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40549000, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v4
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
@@ -6712,13 +6936,23 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; VI-SDAG-LABEL: v_exp10_v3f16_afn:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x3dc5
-; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x113c
+; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42a4
+; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x113c, v1
+; VI-SDAG-NEXT: v_mul_f16_e32 v1, 0x42a4, v1
+; VI-SDAG-NEXT: v_mul_f16_e32 v3, 0x113c, v0
+; VI-SDAG-NEXT: v_mul_f16_e32 v4, 0x42a4, v0
+; VI-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-SDAG-NEXT: v_exp_f16_e32 v2, v2
-; VI-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; VI-SDAG-NEXT: v_exp_f16_e32 v3, v3
+; VI-SDAG-NEXT: v_exp_f16_e32 v4, v4
+; VI-SDAG-NEXT: v_exp_f16_e32 v5, v5
+; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; VI-SDAG-NEXT: v_mul_f16_e32 v1, v1, v2
+; VI-SDAG-NEXT: v_mul_f16_e32 v2, v4, v3
+; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -6738,13 +6972,23 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; GFX900-SDAG-LABEL: v_exp10_v3f16_afn:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0
-; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x113c
+; GFX900-SDAG-NEXT: s_movk_i32 s5, 0x42a4
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x113c, v1
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x42a4, v1
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v3, 0x113c, v0
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v4, 0x42a4, v0
+; GFX900-SDAG-NEXT: v_mul_f16_sdwa v5, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2
-; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
-; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1
; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v3, v3
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v4, v4
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v5, v5
+; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, v4, v3
+; GFX900-SDAG-NEXT: v_mul_f16_e32 v0, v0, v5
; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -6765,23 +7009,38 @@ define <3 x half> @v_exp10_v3f16_afn(<3 x half> %in) {
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3a278000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x40548000, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3a278000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x40548000, v1
+; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3a278000, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x40548000, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3
; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5
; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v4, v4
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v5, v5
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v5
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v3
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, v1, v4
+; SI-SDAG-NEXT: v_mul_f32_e32 v2, v2, v5
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp10_v3f16_afn:
More information about the llvm-commits
mailing list