[llvm] 1faa479 - AMDGPU: Handle unsafe exp.f32 with denormal handling
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 14 15:36:15 PDT 2023
Author: Matt Arsenault
Date: 2023-08-14T18:36:01-04:00
New Revision: 1faa4797ca7cfef0b38e8404ae316e4728c2e67a
URL: https://github.com/llvm/llvm-project/commit/1faa4797ca7cfef0b38e8404ae316e4728c2e67a
DIFF: https://github.com/llvm/llvm-project/commit/1faa4797ca7cfef0b38e8404ae316e4728c2e67a.diff
LOG: AMDGPU: Handle unsafe exp.f32 with denormal handling
I somehow missed this path when adding the new expansions. Saves a lot
of instructions for afn + IEEE.
https://reviews.llvm.org/D157867
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/llvm.exp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ed66b1d57c4de..ef8da1e35e924e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2760,14 +2760,40 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
}
-SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue Op, const SDLoc &SL,
+SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
SelectionDAG &DAG,
SDNodeFlags Flags) const {
- // exp2(M_LOG2E_F * f);
- EVT VT = Op.getValueType();
- const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Op, K, Flags);
- return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul,
+ EVT VT = X.getValueType();
+ const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
+
+ if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
+ // exp2(M_LOG2E_F * f);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
+ return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT,
+ Mul, Flags);
+ }
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT);
+ SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
+
+ SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT);
+
+ SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
+
+ SDValue AdjustedX =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
+
+ SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
+
+ SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
+
+ SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT);
+ SDValue AdjustedResult =
+ DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);
+
+ return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
Flags);
}
@@ -2800,7 +2826,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
// library behavior. Also, is known-not-daz source sufficient?
- if (allowApproxFunc(DAG, Flags) && !needsDenormHandlingF32(DAG, X, Flags)) {
+ if (allowApproxFunc(DAG, Flags)) {
assert(!IsExp10 && "todo exp10 support");
return lowerFEXPUnsafe(X, SL, DAG, Flags);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 33be59a69b52e2..d7e2d15f98a385 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3304,20 +3304,42 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
}
bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
- Register Src,
- unsigned Flags) const {
+ Register X, unsigned Flags) const {
LLT Ty = B.getMRI()->getType(Dst);
- auto K = B.buildFConstant(Ty, numbers::log2e);
- auto Mul = B.buildFMul(Ty, Src, K, Flags);
+ LLT F32 = LLT::scalar(32);
- if (Ty == LLT::scalar(32)) {
- B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
+ if (Ty != F32 || !needsDenormHandlingF32(B.getMF(), X, Flags)) {
+ auto Log2E = B.buildFConstant(Ty, numbers::log2e);
+ auto Mul = B.buildFMul(Ty, X, Log2E, Flags);
+
+ if (Ty == F32) {
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
.addUse(Mul.getReg(0))
.setMIFlags(Flags);
- } else {
- B.buildFExp2(Dst, Mul.getReg(0), Flags);
+ } else {
+ B.buildFExp2(Dst, Mul.getReg(0), Flags);
+ }
+
+ return true;
}
+ auto Threshold = B.buildFConstant(Ty, -0x1.5d58a0p+6f);
+ auto NeedsScaling =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, Threshold, Flags);
+ auto ScaleOffset = B.buildFConstant(Ty, 0x1.0p+6f);
+ auto ScaledX = B.buildFAdd(Ty, X, ScaleOffset, Flags);
+ auto AdjustedX = B.buildSelect(Ty, NeedsScaling, ScaledX, X, Flags);
+
+ auto Log2E = B.buildFConstant(Ty, numbers::log2e);
+ auto ExpInput = B.buildFMul(Ty, AdjustedX, Log2E, Flags);
+
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
+ .addUse(ExpInput.getReg(0))
+ .setMIFlags(Flags);
+
+ auto ResultScaleFactor = B.buildFConstant(Ty, 0x1.969d48p-93f);
+ auto AdjustedResult = B.buildFMul(Ty, Exp2, ResultScaleFactor, Flags);
+ B.buildSelect(Dst, NeedsScaling, AdjustedResult, Exp2, Flags);
return true;
}
@@ -3358,7 +3380,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
// library behavior. Also, is known-not-daz source sufficient?
- if (allowApproxFunc(MF, Flags) && !needsDenormHandlingF32(MF, X, Flags)) {
+ if (allowApproxFunc(MF, Flags)) {
legalizeFExpUnsafe(B, Dst, X, Flags);
MI.eraseFromParent();
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 36d5326b0a3eba..16ee31a875b395 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -3212,124 +3212,56 @@ define float @v_exp_fneg_f32(float %in) {
}
define float @v_exp_f32_fast(float %in) {
-; VI-SDAG-LABEL: v_exp_f32_fast:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_fast:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_fast:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_fast:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-GISEL-LABEL: v_exp_f32_fast:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_fast:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_fast:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_fast:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_fast:
@@ -3346,148 +3278,56 @@ define float @v_exp_f32_fast(float %in) {
}
define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; VI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_unsafe_math_attr:
@@ -3504,148 +3344,56 @@ define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
}
define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; VI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_approx_fn_attr:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GCN-SDAG-LABEL: v_exp_f32_approx_fn_attr:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-GISEL-LABEL: v_exp_f32_approx_fn_attr:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GCN-GISEL-LABEL: v_exp_f32_approx_fn_attr:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_exp_f32_approx_fn_attr:
@@ -3657,495 +3405,16 @@ define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
; CM: ; %bb.0:
; CM-NEXT: CF_END
; CM-NEXT: PAD
- %result = call float @llvm.exp.f32(float %in)
- ret float %result
-}
-
-define float @v_exp_f32_ninf(float %in) {
-; VI-SDAG-LABEL: v_exp_f32_ninf:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_ninf:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_ninf:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp_f32_ninf:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-SDAG-LABEL: v_exp_f32_ninf:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_exp_f32_ninf:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_exp_f32_ninf:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_exp_f32_ninf:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call ninf float @llvm.exp.f32(float %in)
- ret float %result
-}
-
-define float @v_exp_f32_afn(float %in) {
-; VI-SDAG-LABEL: v_exp_f32_afn:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_afn:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_afn:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp_f32_afn:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-SDAG-LABEL: v_exp_f32_afn:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_exp_f32_afn:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_exp_f32_afn:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_exp_f32_afn:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call afn float @llvm.exp.f32(float %in)
- ret float %result
-}
-
-define float @v_exp_f32_afn_daz(float %in) #0 {
-; GCN-LABEL: v_exp_f32_afn_daz:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; GCN-NEXT: v_exp_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-LABEL: v_exp_f32_afn_daz:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
-; SI-NEXT: v_exp_f32_e32 v0, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_exp_f32_afn_daz:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_exp_f32_afn_daz:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call afn float @llvm.exp.f32(float %in)
- ret float %result
-}
-
-define float @v_exp_f32_afn_dynamic(float %in) #1 {
-; VI-SDAG-LABEL: v_exp_f32_afn_dynamic:
-; VI-SDAG: ; %bb.0:
-; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
-; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
-; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
-; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-GISEL-LABEL: v_exp_f32_afn_dynamic:
-; VI-GISEL: ; %bb.0:
-; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
-; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic:
-; GFX900-SDAG: ; %bb.0:
-; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-GISEL-LABEL: v_exp_f32_afn_dynamic:
-; GFX900-GISEL: ; %bb.0:
-; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
-; SI-SDAG: ; %bb.0:
-; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
-; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
-; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; SI-GISEL-LABEL: v_exp_f32_afn_dynamic:
-; SI-GISEL: ; %bb.0:
-; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
-; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
-; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_exp_f32_afn_dynamic:
-; R600: ; %bb.0:
-; R600-NEXT: CF_END
-; R600-NEXT: PAD
-;
-; CM-LABEL: v_exp_f32_afn_dynamic:
-; CM: ; %bb.0:
-; CM-NEXT: CF_END
-; CM-NEXT: PAD
- %result = call afn float @llvm.exp.f32(float %in)
+ %result = call float @llvm.exp.f32(float %in)
ret float %result
}
-define float @v_fabs_exp_f32_afn(float %in) {
-; VI-SDAG-LABEL: v_fabs_exp_f32_afn:
+define float @v_exp_f32_ninf(float %in) {
+; VI-SDAG-LABEL: v_exp_f32_ninf:
; VI-SDAG: ; %bb.0:
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
-; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
-; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1
+; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
@@ -4158,21 +3427,16 @@ define float @v_fabs_exp_f32_afn(float %in) {
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; VI-GISEL-LABEL: v_fabs_exp_f32_afn:
+; VI-GISEL-LABEL: v_exp_f32_ninf:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1
-; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
@@ -4184,106 +3448,316 @@ define float @v_fabs_exp_f32_afn(float %in) {
; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-SDAG-LABEL: v_fabs_exp_f32_afn:
+; GFX900-SDAG-LABEL: v_exp_f32_ninf:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX900-GISEL-LABEL: v_fabs_exp_f32_afn:
+; GFX900-GISEL-LABEL: v_exp_f32_ninf:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
-; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; SI-SDAG-LABEL: v_fabs_exp_f32_afn:
+; SI-SDAG-LABEL: v_exp_f32_ninf:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
-; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
+; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; SI-GISEL-LABEL: v_fabs_exp_f32_afn:
+; SI-GISEL-LABEL: v_exp_f32_ninf:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mul_f32_e64 v1, |v0|, s4
-; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, s4, -v1
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_fma_f32 v2, |v0|, v3, v2
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5]
-; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_exp_f32_ninf:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_exp_f32_ninf:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call ninf float @llvm.exp.f32(float %in)
+ ret float %result
+}
+
+define float @v_exp_f32_afn(float %in) {
+; GCN-SDAG-LABEL: v_exp_f32_afn:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp_f32_afn:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_exp_f32_afn:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_exp_f32_afn:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call afn float @llvm.exp.f32(float %in)
+ ret float %result
+}
+
+define float @v_exp_f32_afn_daz(float %in) #0 {
+; GCN-LABEL: v_exp_f32_afn_daz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-NEXT: v_exp_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-LABEL: v_exp_f32_afn_daz:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-NEXT: v_exp_f32_e32 v0, v0
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_exp_f32_afn_daz:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_exp_f32_afn_daz:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call afn float @llvm.exp.f32(float %in)
+ ret float %result
+}
+
+define float @v_exp_f32_afn_dynamic(float %in) #1 {
+; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_exp_f32_afn_dynamic:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_exp_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_exp_f32_afn_dynamic:
+; R600: ; %bb.0:
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+;
+; CM-LABEL: v_exp_f32_afn_dynamic:
+; CM: ; %bb.0:
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+ %result = call afn float @llvm.exp.f32(float %in)
+ ret float %result
+}
+
+define float @v_fabs_exp_f32_afn(float %in) {
+; GCN-SDAG-LABEL: v_fabs_exp_f32_afn:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42800000
+; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
+; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
+; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GCN-GISEL-LABEL: v_fabs_exp_f32_afn:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
+; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
+; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-SDAG-LABEL: v_fabs_exp_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
+; SI-SDAG-NEXT: s_mov_b32 s5, 0x42800000
+; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; SI-GISEL-LABEL: v_fabs_exp_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
+; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc
+; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_exp_f32_afn:
@@ -6465,21 +5939,15 @@ define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
-; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
-; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
-; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
-; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
+; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast:
More information about the llvm-commits
mailing list