[llvm-branch-commits] [llvm] 1ba6abc - AMDGPU: Fix fast math log2 f32

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 21 01:08:16 PDT 2023


Author: Matt Arsenault
Date: 2023-08-21T09:54:24+02:00
New Revision: 1ba6abce1f55323c065d7ec3ef1700c53dffa862

URL: https://github.com/llvm/llvm-project/commit/1ba6abce1f55323c065d7ec3ef1700c53dffa862
DIFF: https://github.com/llvm/llvm-project/commit/1ba6abce1f55323c065d7ec3ef1700c53dffa862.diff

LOG: AMDGPU: Fix fast math log2 f32

Apparently afn doesn't allow you to drop the denormal handling
according to OpenCL conformance. This was hidden by losing the flags
during the library linking process. Fast log is still broken and needs
more work.

https://reviews.llvm.org/D157936
(cherry picked from commit e09b3593ba64d004a9d2b3fa41be2ba84f968a88)

Added: 
    

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/llvm.log2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 0a7ae20e9b1c8c..7c749e225c5991 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -980,8 +980,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                              half). Not implemented for double. Hardware provides
                                              1ULP accuracy for float, and 0.51ULP for half. Float
                                              instruction does not natively support denormal
-                                             inputs. Backend will optimize out denormal scaling if
-                                             marked with the :ref:`afn <fastmath_afn>` flag.
+                                             inputs.
 
   :ref:`llvm.sqrt <int_sqrt>`                Implemented for double, float and half (and vectors).
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 254d02d4ce5bad..fc82fb620142f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2528,7 +2528,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
 std::pair<SDValue, SDValue>
 AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
                                         SDValue Src, SDNodeFlags Flags) const {
-  if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+  if (!needsDenormHandlingF32(DAG, Src, Flags))
     return {};
 
   MVT VT = MVT::f32;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 120c00b14a3693..9325b14e7cc5f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3037,8 +3037,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
 std::pair<Register, Register>
 AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
                                        unsigned Flags) const {
-  if (allowApproxFunc(B.getMF(), Flags) ||
-      !needsDenormHandlingF32(B.getMF(), Src, Flags))
+  if (!needsDenormHandlingF32(B.getMF(), Src, Flags))
     return {};
 
   const LLT F32 = LLT::scalar(32);

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index f5e30d654b0552..6485f13d0c89e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -1511,17 +1511,59 @@ define float @v_log2_fneg_f32(float %in) {
 }
 
 define float @v_log2_f32_fast(float %in) {
-; GFX689-LABEL: v_log2_f32_fast:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_fast:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_fast:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_fast:
 ; R600:       ; %bb.0:
@@ -1537,17 +1579,59 @@ define float @v_log2_f32_fast(float %in) {
 }
 
 define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_unsafe_math_attr:
 ; R600:       ; %bb.0:
@@ -1563,17 +1647,59 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 }
 
 define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_approx_fn_attr:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_approx_fn_attr:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_approx_fn_attr:
 ; R600:       ; %bb.0:
@@ -1657,17 +1783,59 @@ define float @v_log2_f32_ninf(float %in) {
 }
 
 define float @v_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_log2_f32_afn:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_afn:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_afn:
 ; R600:       ; %bb.0:
@@ -1709,17 +1877,59 @@ define float @v_log2_f32_afn_daz(float %in) #0 {
 }
 
 define float @v_log2_f32_afn_dynamic(float %in) #1 {
-; GFX689-LABEL: v_log2_f32_afn_dynamic:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e32 v0, v0
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log2_f32_afn_dynamic:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e32 v0, v0
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log2_f32_afn_dynamic:
 ; R600:       ; %bb.0:
@@ -1735,17 +1945,61 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
 }
 
 define float @v_fabs_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_fabs_log2_f32_afn:
-; GFX689:       ; %bb.0:
-; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT:    v_log_f32_e64 v0, |v0|
-; GFX689-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX689-SDAG:       ; %bb.0:
+; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT:    s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
+; GFX689-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_fabs_log2_f32_afn:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_log_f32_e64 v0, |v0|
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX689-GISEL:       ; %bb.0:
+; GFX689-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT:    v_mul_f32_e64 v0, |v0|, v1
+; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_fabs_log2_f32_afn:
 ; R600:       ; %bb.0:


        


More information about the llvm-branch-commits mailing list