[llvm] AMDGPU: Fix broken exp10 lowering for f16 (PR #170582)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 16:30:35 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

This was calling the exp handling, so multiplying by the wrong
constant.

GlobalISel is still broken, but missing the fast exp10 path.
This is tracked in https://github.com/llvm/llvm-project/issues/170576

---

Patch is 38.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170582.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+7-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+385-126) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 971dfdbe3e70a..5be5c66ba17dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3055,8 +3055,11 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
 
   if (VT.getScalarType() == MVT::f16) {
     // v_exp_f16 (fmul x, log2e)
-    if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
-      return lowerFEXPUnsafe(X, SL, DAG, Flags);
+
+    if (allowApproxFunc(DAG, Flags)) { // TODO: Does this really require fast?
+      return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
+                     : lowerFEXPUnsafe(X, SL, DAG, Flags);
+    }
 
     if (VT.isVector())
       return SDValue();
@@ -3066,7 +3069,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
 
     // Nothing in half is a denormal when promoted to f32.
     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
-    SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
+    SDValue Lowered = IsExp10 ? lowerFEXP10Unsafe(Ext, SL, DAG, Flags)
+                              : lowerFEXPUnsafe(Ext, SL, DAG, Flags);
     return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
                        DAG.getTargetConstant(0, SL, MVT::i32), Flags);
   }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 3928ec2dd76d3..8a0e02664fc6c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -5877,22 +5877,37 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
 
 ; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets
 define half @v_exp10_f16(half %in) {
-; GCN-LABEL: v_exp10_f16:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_f16:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_f16:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5920,22 +5935,37 @@ define half @v_exp10_f16(half %in) {
 }
 
 define half @v_exp10_fabs_f16(half %in) {
-; GCN-LABEL: v_exp10_fabs_f16:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_fabs_f16:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_fabs_f16:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_fabs_f16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5967,9 +5997,12 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
 ; GCN-SDAG-LABEL: v_exp10_fneg_fabs_f16:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5987,8 +6020,11 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0xba2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0xc0549000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6021,9 +6057,12 @@ define half @v_exp10_fneg_f16(half %in) {
 ; GCN-SDAG-LABEL: v_exp10_fneg_f16:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0xbfb8aa3b, v0
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -6041,8 +6080,11 @@ define half @v_exp10_fneg_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6071,20 +6113,33 @@ define half @v_exp10_fneg_f16(half %in) {
 }
 
 define half @v_exp10_f16_fast(half %in) {
-; GCN-LABEL: v_exp10_f16_fast:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
-; GCN-NEXT:    v_exp_f16_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp10_f16_fast:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    v_mul_f16_e32 v1, 0x113c, v0
+; GCN-SDAG-NEXT:    v_mul_f16_e32 v0, 0x42a4, v0
+; GCN-SDAG-NEXT:    v_exp_f16_e32 v1, v1
+; GCN-SDAG-NEXT:    v_exp_f16_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp10_f16_fast:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
+; GCN-GISEL-NEXT:    v_exp_f16_e32 v0, v0
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_f16_fast:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8a000, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a278000, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40548000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-GISEL-LABEL: v_exp10_f16_fast:
@@ -6117,11 +6172,17 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6140,19 +6201,39 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; VI-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_exp10_v2f16:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_cvt_f32_f16_e32 v1, v0
-; GFX900-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GFX900-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_exp10_v2f16:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v0
+; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_exp10_v2f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT:    v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SDAG-LABEL: v_exp10_v2f16:
 ; SI-SDAG:       ; %bb.0:
@@ -6161,11 +6242,17 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -6203,11 +6290,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6232,11 +6325,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v0|
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6263,11 +6362,17 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -6311,11 +6416,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6340,11 +6451,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, -|v0|
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6375,11 +6492,17 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v0
 ; SI-SDAG-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; SI-SDAG-NEXT:    v_...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/170582


More information about the llvm-commits mailing list