[llvm-branch-commits] [llvm] 216fdc8 - AMDGPU: Fix fast f32 log/log10
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 21 01:08:18 PDT 2023
Author: Matt Arsenault
Date: 2023-08-21T09:54:24+02:00
New Revision: 216fdc890d0ed5c516ff1487f3a1497e75b5a156
URL: https://github.com/llvm/llvm-project/commit/216fdc890d0ed5c516ff1487f3a1497e75b5a156
DIFF: https://github.com/llvm/llvm-project/commit/216fdc890d0ed5c516ff1487f3a1497e75b5a156.diff
LOG: AMDGPU: Fix fast f32 log/log10
OpenCL conformance didn't like interpreting afn as ignore the denormal
handling.
https://reviews.llvm.org/D157940
(cherry picked from commit 4b7b4b945856c46b91ade7b6c4264fe8e258367e)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/test/CodeGen/AMDGPU/llvm.log.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fc82fb620142f3..69ea1501d147d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2609,9 +2609,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
}
- SDValue Lowered = LowerFLOGUnsafe(
- X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2,
- Flags);
+ SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags);
if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
DAG.getTargetConstant(0, DL, MVT::i32), Flags);
@@ -2696,11 +2694,36 @@ SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
// promote f16 operation.
SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
- SelectionDAG &DAG,
- double Log2BaseInverted,
+ SelectionDAG &DAG, bool IsLog10,
SDNodeFlags Flags) const {
EVT VT = Src.getValueType();
unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2;
+
+ double Log2BaseInverted =
+ IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
+
+ if (VT == MVT::f32) {
+ auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);
+ if (ScaledInput) {
+ SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
+ SDValue ScaledResultOffset =
+ DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);
+
+ SDValue Zero = DAG.getConstantFP(0.0f, SL, VT);
+
+ SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled,
+ ScaledResultOffset, Zero, Flags);
+
+ SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT);
+
+ if (Subtarget->hasFastFMAF32())
+ return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,
+ Flags);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags);
+ return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset);
+ }
+ }
+
SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 26b91155ba85df..c39093b9bb6bb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -72,7 +72,7 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
- double Log2BaseInverted, SDNodeFlags Flags) const;
+ bool IsLog10, SDNodeFlags Flags) const;
SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 9325b14e7cc5f2..100180a2ab4424 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3131,16 +3131,13 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) ||
TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) {
- const double Log2BaseInv =
- IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
-
if (Ty == F16 && !ST.has16BitInsts()) {
Register LogVal = MRI.createGenericVirtualRegister(F32);
auto PromoteSrc = B.buildFPExt(F32, X);
- legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags);
+ legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), IsLog10, Flags);
B.buildFPTrunc(Dst, LogVal);
} else {
- legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags);
+ legalizeFlogUnsafe(B, Dst, X, IsLog10, Flags);
}
MI.eraseFromParent();
@@ -3224,10 +3221,36 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
}
bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
- Register Src,
- double Log2BaseInverted,
+ Register Src, bool IsLog10,
unsigned Flags) const {
+ const double Log2BaseInverted =
+ IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
+
LLT Ty = B.getMRI()->getType(Dst);
+
+ if (Ty == LLT::scalar(32)) {
+ auto [ScaledInput, IsScaled] = getScaledLogInput(B, Src, Flags);
+ if (ScaledInput) {
+ auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ auto ScaledResultOffset = B.buildFConstant(Ty, -32.0 * Log2BaseInverted);
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto ResultOffset =
+ B.buildSelect(Ty, IsScaled, ScaledResultOffset, Zero, Flags);
+ auto Log2Inv = B.buildFConstant(Ty, Log2BaseInverted);
+
+ if (ST.hasFastFMAF32())
+ B.buildFMA(Dst, LogSrc, Log2Inv, ResultOffset, Flags);
+ else {
+ auto Mul = B.buildFMul(Ty, LogSrc, Log2Inv, Flags);
+ B.buildFAdd(Dst, Mul, ResultOffset, Flags);
+ }
+
+ return true;
+ }
+ }
+
auto Log2Operand = Ty == LLT::scalar(16)
? B.buildFLog2(Ty, Src, Flags)
: B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 04773f275c8756..534bb2c87ea384 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -85,7 +85,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
- double Log2BaseInverted, unsigned Flags) const;
+ bool IsLog10, unsigned Flags) const;
bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
unsigned Flags) const;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 41d5f7f2303e8e..23aab3fa863d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -2924,20 +2924,111 @@ define float @v_log_fneg_f32(float %in) {
}
define float @v_log_f32_fast(float %in) {
-; GFX689-LABEL: v_log_f32_fast:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_fast:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_fast:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_fast:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_fast:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_fast:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_fast:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_fast:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_fast:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_fast:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_fast:
; R600: ; %bb.0:
@@ -2953,20 +3044,111 @@ define float @v_log_f32_fast(float %in) {
}
define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GFX689-LABEL: v_log_f32_unsafe_math_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_unsafe_math_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_unsafe_math_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_unsafe_math_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_unsafe_math_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_unsafe_math_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_unsafe_math_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_unsafe_math_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_unsafe_math_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_unsafe_math_attr:
; R600: ; %bb.0:
@@ -2982,20 +3164,111 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
}
define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GFX689-LABEL: v_log_f32_approx_fn_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_approx_fn_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_approx_fn_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_approx_fn_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_approx_fn_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_approx_fn_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_approx_fn_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_approx_fn_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_approx_fn_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_approx_fn_attr:
; R600: ; %bb.0:
@@ -3213,20 +3486,111 @@ define float @v_log_f32_ninf(float %in) {
}
define float @v_log_f32_afn(float %in) {
-; GFX689-LABEL: v_log_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_afn:
; R600: ; %bb.0:
@@ -3271,20 +3635,111 @@ define float @v_log_f32_afn_daz(float %in) #0 {
}
define float @v_log_f32_afn_dynamic(float %in) #1 {
-; GFX689-LABEL: v_log_f32_afn_dynamic:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log_f32_afn_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log_f32_afn_dynamic:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log_f32_afn_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log_f32_afn_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log_f32_afn_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log_f32_afn_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_f32_afn_dynamic:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log_f32_afn_dynamic:
; R600: ; %bb.0:
@@ -3300,20 +3755,112 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
}
define float @v_fabs_log_f32_afn(float %in) {
-; GFX689-LABEL: v_fabs_log_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e64 v0, |v0|
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_fabs_log_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_fabs_log_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_fabs_log_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_fabs_log_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317218, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_fabs_log_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317218, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_fabs_log_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317218
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_fabs_log_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317218
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_fabs_log_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e64 v1, |v0|
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc1b17218, s0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3f317218, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_log_f32_afn:
; R600: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 1aebcc61aa2681..9df2cec4441336 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -2924,20 +2924,111 @@ define float @v_log10_fneg_f32(float %in) {
}
define float @v_log10_f32_fast(float %in) {
-; GFX689-LABEL: v_log10_f32_fast:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_fast:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_fast:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_fast:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_fast:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_fast:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_fast:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_fast:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_fast:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_fast:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_fast:
; R600: ; %bb.0:
@@ -2953,20 +3044,111 @@ define float @v_log10_f32_fast(float %in) {
}
define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
-; GFX689-LABEL: v_log10_f32_unsafe_math_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_unsafe_math_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_unsafe_math_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_unsafe_math_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_unsafe_math_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_unsafe_math_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_unsafe_math_attr:
; R600: ; %bb.0:
@@ -2982,20 +3164,111 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
}
define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
-; GFX689-LABEL: v_log10_f32_approx_fn_attr:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_approx_fn_attr:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_approx_fn_attr:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_approx_fn_attr:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_approx_fn_attr:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_approx_fn_attr:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_approx_fn_attr:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_approx_fn_attr:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_approx_fn_attr:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_approx_fn_attr:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_approx_fn_attr:
; R600: ; %bb.0:
@@ -3213,20 +3486,111 @@ define float @v_log10_f32_ninf(float %in) {
}
define float @v_log10_f32_afn(float %in) {
-; GFX689-LABEL: v_log10_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_afn:
; R600: ; %bb.0:
@@ -3271,20 +3635,111 @@ define float @v_log10_f32_afn_daz(float %in) #0 {
}
define float @v_log10_f32_afn_dynamic(float %in) #1 {
-; GFX689-LABEL: v_log10_f32_afn_dynamic:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e32 v0, v0
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_log10_f32_afn_dynamic:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_log10_f32_afn_dynamic:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e32 v0, v0
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_log10_f32_afn_dynamic:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_log10_f32_afn_dynamic:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_log10_f32_afn_dynamic:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e32 v2, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_log10_f32_afn_dynamic:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_log10_f32_afn_dynamic:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_f32_afn_dynamic:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, vcc_lo
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_log10_f32_afn_dynamic:
; R600: ; %bb.0:
@@ -3300,20 +3755,112 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
}
define float @v_fabs_log10_f32_afn(float %in) {
-; GFX689-LABEL: v_fabs_log10_f32_afn:
-; GFX689: ; %bb.0:
-; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX689-NEXT: v_log_f32_e64 v0, |v0|
-; GFX689-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX689-NEXT: s_setpc_b64 s[30:31]
+; SI-SDAG-LABEL: v_fabs_log10_f32_afn:
+; SI-SDAG: ; %bb.0:
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1100-LABEL: v_fabs_log10_f32_afn:
-; GFX1100: ; %bb.0:
-; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: v_log_f32_e64 v0, |v0|
-; GFX1100-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
-; GFX1100-NEXT: s_setpc_b64 s[30:31]
+; SI-GISEL-LABEL: v_fabs_log10_f32_afn:
+; SI-GISEL: ; %bb.0:
+; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; SI-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-SDAG-LABEL: v_fabs_log10_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209b, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-GISEL-LABEL: v_fabs_log10_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209b, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: v_fabs_log10_f32_afn:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000
+; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
+; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209b
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_fabs_log10_f32_afn:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_log_f32_e64 v2, |v0|
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc11a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209b
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v2, v1, v0
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_fabs_log10_f32_afn:
+; GFX1100-SDAG: ; %bb.0:
+; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
+; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0
+; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
+; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn:
+; GFX1100-GISEL: ; %bb.0:
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT: v_log_f32_e64 v1, |v0|
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0|
+; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 0xc11a209b, s0
+; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v0, 0x3e9a209b, v1
+; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; R600-LABEL: v_fabs_log10_f32_afn:
; R600: ; %bb.0:
More information about the llvm-branch-commits
mailing list