[llvm] AMDGPU: Improve exp10 lowering for f16 (PR #170771)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 15:13:23 PST 2025


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/170771

For f16, this can be done accurately by converting to f32
with a multiply. Previously this was treated as an f32 case
that we happen to know is not denormal.

>From 91cfe6bcb0517c5876104ce67aada37488a54ebd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 4 Dec 2025 23:08:44 +0100
Subject: [PATCH] AMDGPU: Improve exp10 lowering for f16

For f16, this can be done accurately by converting to f32
with a multiply. Previously this was treated as an f32 case
that we happen to know is not denormal.
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  39 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |   3 +
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        | 209 ++++--------------
 3 files changed, 72 insertions(+), 179 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 86c70389efc8d..0b3c919051827 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2961,19 +2961,28 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
 }
 
+SDValue AMDGPUTargetLowering::lowerFEXPUnsafeImpl(SDValue X, const SDLoc &SL,
+                                                  SelectionDAG &DAG,
+                                                  SDNodeFlags Flags,
+                                                  bool IsExp10) const {
+  // exp(x) -> exp2(M_LOG2E_F * x);
+  // exp10(x) -> exp2(log2(10) * x);
+  EVT VT = X.getValueType();
+  SDValue Const =
+      DAG.getConstantFP(IsExp10 ? 0x1.a934f0p+1f : numbers::log2e, SL, VT);
+
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Const, Flags);
+  return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
+                                    : (unsigned)ISD::FEXP2,
+                     SL, VT, Mul, Flags);
+}
+
 SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
                                               SelectionDAG &DAG,
                                               SDNodeFlags Flags) const {
   EVT VT = X.getValueType();
-  const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
-
-  if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
-    // exp2(M_LOG2E_F * f);
-    SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
-    return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
-                                      : (unsigned)ISD::FEXP2,
-                       SL, VT, Mul, Flags);
-  }
+  if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags))
+    return lowerFEXPUnsafeImpl(X, SL, DAG, Flags, /*IsExp10=*/false);
 
   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
@@ -2987,6 +2996,7 @@ SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
   SDValue AdjustedX =
       DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
 
+  const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
   SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
 
   SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
@@ -3005,6 +3015,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
                                                 SelectionDAG &DAG,
                                                 SDNodeFlags Flags) const {
   const EVT VT = X.getValueType();
+
   const unsigned Exp2Op = VT == MVT::f32 ? static_cast<unsigned>(AMDGPUISD::EXP)
                                          : static_cast<unsigned>(ISD::FEXP2);
 
@@ -3072,13 +3083,15 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
     if (VT.isVector())
       return SDValue();
 
+    // Nothing in half is a denormal when promoted to f32.
+    //
     // exp(f16 x) ->
     //   fptrunc (v_exp_f32 (fmul (fpext x), log2e))
-
-    // Nothing in half is a denormal when promoted to f32.
+    //
+    // exp10(f16 x) ->
+    //   fptrunc (v_exp_f32 (fmul (fpext x), log2(10)))
     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
-    SDValue Lowered = IsExp10 ? lowerFEXP10Unsafe(Ext, SL, DAG, Flags)
-                              : lowerFEXPUnsafe(Ext, SL, DAG, Flags);
+    SDValue Lowered = lowerFEXPUnsafeImpl(Ext, SL, DAG, Flags, IsExp10);
     return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
                        DAG.getTargetConstant(0, SL, MVT::i32), Flags);
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 9c0eff99981cd..10ae816f9af57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -78,6 +78,9 @@ class AMDGPUTargetLowering : public TargetLowering {
                           bool IsLog10, SDNodeFlags Flags) const;
   SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue lowerFEXPUnsafeImpl(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
+                              SDNodeFlags Flags, bool IsExp10) const;
+
   SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
                           SDNodeFlags Flags) const;
   SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 8a0e02664fc6c..8860d3276d912 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -5881,11 +5881,8 @@ define half @v_exp10_f16(half %in) {
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5903,11 +5900,8 @@ define half @v_exp10_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5939,11 +5933,8 @@ define half @v_exp10_fabs_f16(half %in) {
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -5961,11 +5952,8 @@ define half @v_exp10_fabs_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5997,12 +5985,9 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
 ; GCN-SDAG-LABEL: v_exp10_fneg_fabs_f16:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0xc0549a78, v0
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -6020,11 +6005,8 @@ define half @v_exp10_fneg_fabs_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0xba2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0xc0549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0xc0549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6057,12 +6039,9 @@ define half @v_exp10_fneg_f16(half %in) {
 ; GCN-SDAG-LABEL: v_exp10_fneg_f16:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GCN-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; GCN-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, 0xc0549a78, v0
 ; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -6080,11 +6059,8 @@ define half @v_exp10_fneg_f16(half %in) {
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6172,17 +6148,11 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6206,17 +6176,11 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v0
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6242,17 +6206,11 @@ define <2 x half> @v_exp10_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -6290,17 +6248,11 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6325,17 +6277,11 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v0|
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6362,17 +6308,11 @@ define <2 x half> @v_exp10_fabs_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
@@ -6416,17 +6356,11 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6451,17 +6385,11 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, -|v0|
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6492,17 +6420,11 @@ define <2 x half> @v_exp10_fneg_fabs_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v0
 ; SI-SDAG-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v2
@@ -6547,17 +6469,11 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e64 v0, -v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6582,17 +6498,11 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
 ; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e64 v1, -v0
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6623,17 +6533,11 @@ define <2 x half> @v_exp10_fneg_v2f16(<2 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v0
 ; SI-SDAG-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x3a2784bc, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v2
@@ -6786,23 +6690,14 @@ define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v0
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3a2784bc, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549000, v2
-; VI-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3a2784bc, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; VI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; VI-SDAG-NEXT:    v_exp_f32_e32 v4, v4
+; VI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v2
+; VI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; VI-SDAG-NEXT:    v_exp_f32_e32 v5, v5
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; VI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; VI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; VI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT:    v_mul_f32_e32 v2, v2, v4
-; VI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v5
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; VI-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6831,23 +6726,14 @@ define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v0
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX900-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3a2784bc, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549000, v2
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v5, 0x3a2784bc, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v4, v4
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v2
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v5, v5
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
 ; GFX900-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v2, v2, v4
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v5
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX900-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; GFX900-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
 ; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -6879,23 +6765,14 @@ define <3 x half> @v_exp10_v3f16(<3 x half> %in) {
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v3, 0x3a2784bc, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549000, v0
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v3
+; SI-SDAG-NEXT:    v_mul_f32_e32 v0, 0x40549a78, v0
+; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549a78, v1
+; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v2
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3a2784bc, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, 0x40549000, v1
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v3
-; SI-SDAG-NEXT:    v_exp_f32_e32 v3, v4
-; SI-SDAG-NEXT:    v_mul_f32_e32 v4, 0x3a2784bc, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, 0x40549000, v2
-; SI-SDAG-NEXT:    v_exp_f32_e32 v4, v4
 ; SI-SDAG-NEXT:    v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; SI-SDAG-NEXT:    v_mul_f32_e32 v2, v2, v4
 ; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SDAG-NEXT:    v_cvt_f32_f16_e32 v1, v1



More information about the llvm-commits mailing list