[llvm] 81b278e - AMDGPU: Fix fast f32 exp2

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 15 07:48:56 PDT 2023


Author: Matt Arsenault
Date: 2023-08-15T10:48:46-04:00
New Revision: 81b278e613fb3bcdf8053f7a6b5dd2f3881898d2

URL: https://github.com/llvm/llvm-project/commit/81b278e613fb3bcdf8053f7a6b5dd2f3881898d2
DIFF: https://github.com/llvm/llvm-project/commit/81b278e613fb3bcdf8053f7a6b5dd2f3881898d2.diff

LOG: AMDGPU: Fix fast f32 exp2

Mirror of the previous log changes, OpenCL conformance doesn't like
interpreting afn as ignore denormal handling but was previously hidden
by flag dropping.

Added: 
    llvm/test/CodeGen/AMDGPU/input-mods.r600.ll

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/llvm.exp2.ll

Removed: 
    llvm/test/CodeGen/AMDGPU/input-mods.ll


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 11f0e92eb87850..c0290d10871e03 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -994,8 +994,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                                    half). Not implemented for double. Hardware provides
                                                    1ULP accuracy for float, and 0.51ULP for half. Float
                                                    instruction does not natively support denormal
-                                                   inputs. Backend will optimize out denormal scaling if
-                                                   marked with the :ref:`afn <fastmath_afn>` flag.
+                                                   inputs.
 
   :ref:`llvm.stacksave.p5 <int_stacksave>`         Implemented, must use the alloca address space.
   :ref:`llvm.stackrestore.p5 <int_stackrestore>`   Implemented, must use the alloca address space.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1ef865bde9e482..39744bf7c0f2d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2752,7 +2752,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
 
   assert(VT == MVT::f32);
 
-  if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+  if (!needsDenormHandlingF32(DAG, Src, Flags))
     return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
 
   // bool needs_scaling = x < -0x1.f80000p+6f;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 08a15cd578d02b..08869abb0ea522 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3291,8 +3291,7 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
 
   assert(Ty == F32);
 
-  if (allowApproxFunc(B.getMF(), Flags) ||
-      !needsDenormHandlingF32(B.getMF(), Src, Flags)) {
+  if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
     B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
         .addUse(Src)
         .setMIFlags(Flags);

diff  --git a/llvm/test/CodeGen/AMDGPU/input-mods.ll b/llvm/test/CodeGen/AMDGPU/input-mods.ll
deleted file mode 100644
index 760507f7b9556a..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/input-mods.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
-;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM
-
-;EG-LABEL: {{^}}test:
-;EG: EXP_IEEE *
-;CM-LABEL: {{^}}test:
-;CM: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
-;CM: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
-;CM: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
-;CM: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
-
-define amdgpu_ps void @test(<4 x float> inreg %reg0) {
-   %r0 = extractelement <4 x float> %reg0, i32 0
-   %r1 = call float @llvm.fabs.f32(float %r0)
-   %r2 = fsub float -0.000000e+00, %r1
-   %r3 = call afn float @llvm.exp2.f32(float %r2)
-   %vec = insertelement <4 x float> undef, float %r3, i32 0
-   call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
-   ret void
-}
-
-declare float @llvm.exp2.f32(float) readnone
-declare float @llvm.fabs.f32(float) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

diff  --git a/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll b/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll
new file mode 100644
index 00000000000000..128273410717c8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM %s
+
+define amdgpu_ps void @test(<4 x float> inreg %reg0) {
+; EG-LABEL: test:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 8, @4, KC0[], KC1[]
+; EG-NEXT:    EXPORT T0.X___
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     SETGT * T0.W, literal.x, -|T0.X|,
+; EG-NEXT:    -1023672320(-1.260000e+02), 0(0.000000e+00)
+; EG-NEXT:     CNDE * T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT:    1115684864(6.400000e+01), 0(0.000000e+00)
+; EG-NEXT:     ADD T1.W, -|T0.X|, PV.W,
+; EG-NEXT:     CNDE * T0.W, T0.W, 1.0, literal.x,
+; EG-NEXT:    528482304(5.421011e-20), 0(0.000000e+00)
+; EG-NEXT:     EXP_IEEE * T0.X, PV.W,
+; EG-NEXT:     MUL_IEEE * T0.X, PS, T0.W,
+;
+; CM-LABEL: test:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 11, @4, KC0[], KC1[]
+; CM-NEXT:    EXPORT T0.X___
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     SETGT * T0.W, literal.x, -|T0.X|,
+; CM-NEXT:    -1023672320(-1.260000e+02), 0(0.000000e+00)
+; CM-NEXT:     CNDE * T1.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    1115684864(6.400000e+01), 0(0.000000e+00)
+; CM-NEXT:     CNDE T0.Z, T0.W, 1.0, literal.x,
+; CM-NEXT:     ADD * T0.W, -|T0.X|, PV.W,
+; CM-NEXT:    528482304(5.421011e-20), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     MUL_IEEE * T0.X, PV.X, T0.Z,
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = call float @llvm.fabs.f32(float %r0)
+   %r2 = fsub float -0.000000e+00, %r1
+   %r3 = call afn float @llvm.exp2.f32(float %r2)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+   ret void
+}
+
+declare float @llvm.exp2.f32(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index 1c42ed67daecc1..fa0c723c64e36e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -1168,11 +1168,33 @@ define float @v_exp2_fneg_f32(float %in) {
 }
 
 define float @v_exp2_f32_fast(float %in) {
-; GCN-LABEL: v_exp2_f32_fast:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_fast:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_fast:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp2_f32_fast:
 ; R600:       ; %bb.0:
@@ -1188,11 +1210,33 @@ define float @v_exp2_f32_fast(float %in) {
 }
 
 define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GCN-LABEL: v_exp2_f32_unsafe_math_attr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_unsafe_math_attr:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp2_f32_unsafe_math_attr:
 ; R600:       ; %bb.0:
@@ -1208,11 +1252,33 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 }
 
 define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GCN-LABEL: v_exp2_f32_approx_fn_attr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp2_f32_approx_fn_attr:
 ; R600:       ; %bb.0:
@@ -1270,11 +1336,33 @@ define float @v_exp2_f32_ninf(float %in) {
 }
 
 define float @v_exp2_f32_afn(float %in) {
-; GCN-LABEL: v_exp2_f32_afn:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_afn:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_afn:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp2_f32_afn:
 ; R600:       ; %bb.0:
@@ -1310,11 +1398,33 @@ define float @v_exp2_f32_afn_daz(float %in) #0 {
 }
 
 define float @v_exp2_f32_afn_dynamic(float %in) #1 {
-; GCN-LABEL: v_exp2_f32_afn_dynamic:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp2_f32_afn_dynamic:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e32 v0, v0, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_exp2_f32_afn_dynamic:
 ; R600:       ; %bb.0:
@@ -1330,11 +1440,33 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 {
 }
 
 define float @v_fabs_exp2_f32_afn(float %in) {
-; GCN-LABEL: v_fabs_exp2_f32_afn:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_exp_f32_e64 v0, |v0|
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_fabs_exp2_f32_afn:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT:    s_mov_b32 s4, 0xc2fc0000
+; GCN-SDAG-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-SDAG-NEXT:    v_add_f32_e64 v0, |v0|, v2
+; GCN-SDAG-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-SDAG-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-GISEL-NEXT:    v_add_f32_e64 v0, |v0|, v1
+; GCN-GISEL-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1f800000
+; GCN-GISEL-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
+; GCN-GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_fabs_exp2_f32_afn:
 ; R600:       ; %bb.0:


        


More information about the llvm-commits mailing list