[llvm] [AMDGPU] Do not replace SALU floating point multiply with VALU-only ldexp (PR #145048)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 08:08:17 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/145048.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+6) 
- (modified) llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll (+56-36) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 07d79d677104a..3281eabcd4adb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15189,6 +15189,12 @@ SDValue SITargetLowering::performFMulCombine(SDNode *N,
   EVT ScalarVT = VT.getScalarType();
   EVT IntVT = VT.changeElementType(MVT::i32);
 
+  if (!N->isDivergent() && getSubtarget()->hasSALUFloatInsts() &&
+      (ScalarVT == MVT::f32 || ScalarVT == MVT::f16)) {
+    // Prefer to use s_mul_f16/f32 instead of v_ldexp_f16/f32.
+    return SDValue();
+  }
+
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
index 733fe9317ddcc..a2e7f2e62f5dd 100644
--- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
@@ -2,19 +2,34 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
 
+; TODO: GlobalISel should avoid generating v_ldexp_f32.
 define amdgpu_cs float @v_s_exp_f32(float inreg %src) {
-; GFX12-LABEL: v_s_exp_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_cmp_lt_f32 s0, 0xc2fc0000
-; GFX12-NEXT:    s_cselect_b32 s1, 0x42800000, 0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
-; GFX12-NEXT:    s_add_f32 s0, s0, s1
-; GFX12-NEXT:    s_cselect_b32 s1, 0xffffffc0, 0
-; GFX12-NEXT:    v_s_exp_f32 s0, s0
-; GFX12-NEXT:    s_wait_alu 0xf1ff
-; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
-; GFX12-NEXT:    v_ldexp_f32 v0, s0, s1
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX12-SDAG-LABEL: v_s_exp_f32:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0xc2fc0000
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x42800000, 0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_add_f32 s0, s0, s1
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x1f800000, 1.0
+; GFX12-SDAG-NEXT:    v_s_exp_f32 s0, s0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_mul_f32 s0, s0, s1
+; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: v_s_exp_f32:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    s_cmp_lt_f32 s0, 0xc2fc0000
+; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 0x42800000, 0
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-GISEL-NEXT:    s_add_f32 s0, s0, s1
+; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 0xffffffc0, 0
+; GFX12-GISEL-NEXT:    v_s_exp_f32 s0, s0
+; GFX12-GISEL-NEXT:    s_wait_alu 0xf1ff
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
+; GFX12-GISEL-NEXT:    v_ldexp_f32 v0, s0, s1
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
   %result = call float @llvm.exp2.f32(float %src)
   ret float %result
 }
@@ -59,14 +74,15 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: v_s_log_f32:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0x800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, s0, s1
-; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
-; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x4f800000, 1.0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_mul_f32 s0, s0, s1
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x42000000, 0
+; GFX12-SDAG-NEXT:    v_s_log_f32 s0, s0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_sub_f32 s0, s0, s1
 ; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
-; GFX12-SDAG-NEXT:    v_subrev_f32_e32 v0, s0, v0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX12-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-LABEL: v_s_log_f32:
@@ -147,7 +163,7 @@ define amdgpu_cs half @v_s_rcp_f16(half inreg %src) {
   ret half %result
 }
 
-; TODO-GFX12: GlobalISel should generate v_s_rsq.
+; TODO: GlobalISel should generate v_s_rsq.
 define amdgpu_cs float @v_s_rsq_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: v_s_rsq_f32:
 ; GFX12-SDAG:       ; %bb.0:
@@ -184,7 +200,7 @@ define amdgpu_cs half @v_s_rsq_f16(half inreg %src) {
   ret half %result
 }
 
-; TODO-GFX12: Should not use any VALU instructions.
+; TODO: Should avoid generating v_cmp_class_f32.
 define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: v_s_sqrt_f32:
 ; GFX12-SDAG:       ; %bb.0:
@@ -298,16 +314,18 @@ define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src)  {
 define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: srcmods_abs_f32:
 ; GFX12-SDAG:       ; %bb.0:
-; GFX12-SDAG-NEXT:    s_and_b32 s1, s0, 0x7fffffff
+; GFX12-SDAG-NEXT:    s_bitset0_b32 s0, 31
 ; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s1, 0x800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, |s0|, s1
-; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0x800000
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x4f800000, 1.0
+; GFX12-SDAG-NEXT:    s_mul_f32 s0, s0, s1
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x42000000, 0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX12-SDAG-NEXT:    v_s_log_f32 s0, s0
+; GFX12-SDAG-NEXT:    s_sub_f32 s0, s0, s1
 ; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
-; GFX12-SDAG-NEXT:    v_subrev_f32_e32 v0, s0, v0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX12-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-LABEL: srcmods_abs_f32:
@@ -333,15 +351,17 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
 define amdgpu_cs float @srcmods_neg_f32(float inreg %src) {
 ; GFX12-SDAG-LABEL: srcmods_neg_f32:
 ; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    s_xor_b32 s1, s0, 0x80000000
 ; GFX12-SDAG-NEXT:    s_cmp_gt_f32 s0, 0x80800000
-; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 32, 0
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-SDAG-NEXT:    v_ldexp_f32 v0, -s0, s1
-; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x42000000, 0
-; GFX12-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x4f800000, 1.0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_mul_f32 s0, s1, s0
+; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x42000000, 0
+; GFX12-SDAG-NEXT:    v_s_log_f32 s0, s0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
+; GFX12-SDAG-NEXT:    s_sub_f32 s0, s0, s1
 ; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
-; GFX12-SDAG-NEXT:    v_subrev_f32_e32 v0, s0, v0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX12-SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-LABEL: srcmods_neg_f32:

``````````

</details>


https://github.com/llvm/llvm-project/pull/145048


More information about the llvm-commits mailing list