[llvm] e09b359 - AMDGPU: Fix fast math log2 f32
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 15 07:48:52 PDT 2023
Author: Matt Arsenault
Date: 2023-08-15T10:48:46-04:00
New Revision: e09b3593ba64d004a9d2b3fa41be2ba84f968a88
URL: https://github.com/llvm/llvm-project/commit/e09b3593ba64d004a9d2b3fa41be2ba84f968a88
DIFF: https://github.com/llvm/llvm-project/commit/e09b3593ba64d004a9d2b3fa41be2ba84f968a88.diff
LOG: AMDGPU: Fix fast math log2 f32
Apparently afn doesn't allow you to drop the denormal handling
according to OpenCL conformance. This was hidden by losing the flags
during the library linking process. Fast log is still broken and needs
more work.
https://reviews.llvm.org/D157936
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/llvm.log2.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 1c0a949ebc2baa..11f0e92eb87850 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -980,8 +980,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
half). Not implemented for double. Hardware provides
1ULP accuracy for float, and 0.51ULP for half. Float
instruction does not natively support denormal
- inputs. Backend will optimize out denormal scaling if
- marked with the :ref:`afn <fastmath_afn>` flag.
+ inputs.
:ref:`llvm.sqrt <int_sqrt>` Implemented for double, float and half (and vectors).
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef8da1e35e924e..dd2ef82cf92b09 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2529,7 +2529,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
SDValue Src, SDNodeFlags Flags) const {
- if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+ if (!needsDenormHandlingF32(DAG, Src, Flags))
return {};
MVT VT = MVT::f32;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index d7e2d15f98a385..ca6ea1504c4b87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3043,8 +3043,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
std::pair<Register, Register>
AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
unsigned Flags) const {
- if (allowApproxFunc(B.getMF(), Flags) ||
- !needsDenormHandlingF32(B.getMF(), Src, Flags))
+ if (!needsDenormHandlingF32(B.getMF(), Src, Flags))
return {};
const LLT F32 = LLT::scalar(32);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index f5e30d654b0552..6485f13d0c89e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -1511,17 +1511,59 @@ define float @v_log2_fneg_f32(float %in) {
}
define float @v_log2_f32_fast(float %in) {
-; GFX689-LABEL: v_log2_f32_fast:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_fast:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log2_f32_fast:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_fast:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_fast:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_fast:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_fast:
; R600: ; %bb.0:
@@ -1537,17 +1579,59 @@ define float @v_log2_f32_fast(float %in) {
}
define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log2_f32_unsafe_math_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_unsafe_math_attr:
; R600: ; %bb.0:
@@ -1563,17 +1647,59 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
}
define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GFX689-LABEL: v_log2_f32_approx_fn_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log2_f32_approx_fn_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_approx_fn_attr:
; R600: ; %bb.0:
@@ -1657,17 +1783,59 @@ define float @v_log2_f32_ninf(float %in) {
}
define float @v_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_log2_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log2_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_afn:
; R600: ; %bb.0:
@@ -1709,17 +1877,59 @@ define float @v_log2_f32_afn_daz(float %in) #0 {
}
define float @v_log2_f32_afn_dynamic(float %in) #1 {
-; GFX689-LABEL: v_log2_f32_afn_dynamic:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log2_f32_afn_dynamic:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log2_f32_afn_dynamic:
; R600: ; %bb.0:
@@ -1735,17 +1945,61 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 {
}
define float @v_fabs_log2_f32_afn(float %in) {
-; GFX689-LABEL: v_fabs_log2_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e64 v0, |v0|
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX689-SDAG: ; %bb.0:
+; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_fabs_log2_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX689-GISEL: ; %bb.0:
+; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc
+; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000
+; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0
+; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_log2_f32_afn:
; R600: ; %bb.0:
More information about the llvm-commits
mailing list