[llvm] fbe4ff8 - AMDGPU: Partially fix not respecting dynamic denormal mode
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 11 12:14:58 PDT 2023
Author: Matt Arsenault
Date: 2023-07-11T15:14:52-04:00
New Revision: fbe4ff8149a2b656a66cc0c64a38a5302c051c6e
URL: https://github.com/llvm/llvm-project/commit/fbe4ff8149a2b656a66cc0c64a38a5302c051c6e
DIFF: https://github.com/llvm/llvm-project/commit/fbe4ff8149a2b656a66cc0c64a38a5302c051c6e.diff
LOG: AMDGPU: Partially fix not respecting dynamic denormal mode
The most notable issue was producing v_mad_f32 in functions with the
dynamic mode, since it just ignores the mode. fdiv lowering is still
somewhat broken because it involves a mode switch and we need to query
the original mode.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
llvm/test/CodeGen/AMDGPU/fdiv.ll
llvm/test/CodeGen/AMDGPU/llvm.exp.ll
llvm/test/CodeGen/AMDGPU/llvm.log.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.ll
llvm/test/CodeGen/AMDGPU/v_mac.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 618beb80615229..3abd13c1b24c3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -99,7 +99,7 @@ class AMDGPUCodeGenPrepareImpl
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;
- bool HasFP32Denormals = false;
+ bool HasFP32DenormalFlush = false;
bool FlowChanged = false;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
@@ -793,8 +793,8 @@ static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
//
// NOTE: optimizeWithRcp should be tried first because rcp is the preference.
static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
- bool HasDenormals, IRBuilder<> &Builder,
- Module *Mod) {
+ bool HasFP32DenormalFlush,
+ IRBuilder<> &Builder, Module *Mod) {
// fdiv.fast can achieve 2.5 ULP accuracy.
if (ReqdAccuracy < 2.5f)
return nullptr;
@@ -811,7 +811,7 @@ static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
}
// fdiv does not support denormals. But 1.0/x is always fine to use it.
- if (HasDenormals && !NumIsOne)
+ if (!HasFP32DenormalFlush && !NumIsOne)
return nullptr;
Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
@@ -851,7 +851,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
// rcp_f16 is accurate to 0.51 ulp.
// rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
// rcp_f64 is never accurate.
- const bool RcpIsAccurate = !HasFP32Denormals && ReqdAccuracy >= 1.0f;
+ const bool RcpIsAccurate = HasFP32DenormalFlush && ReqdAccuracy >= 1.0f;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
Builder.setFastMathFlags(FMF);
@@ -873,8 +873,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
Value *NewElt = optimizeWithRcp(NumEltI, DenEltI, AllowInaccurateRcp,
RcpIsAccurate, Builder, Mod);
if (!NewElt) // Try fdiv.fast.
- NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
- HasFP32Denormals, Builder, Mod);
+ NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
+ HasFP32DenormalFlush, Builder, Mod);
if (!NewElt) // Keep the original.
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
@@ -885,8 +885,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
NewFDiv = optimizeWithRcp(Num, Den, AllowInaccurateRcp, RcpIsAccurate,
Builder, Mod);
if (!NewFDiv) { // Try fdiv.fast.
- NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, HasFP32Denormals,
- Builder, Mod);
+ NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy,
+ HasFP32DenormalFlush, Builder, Mod);
}
}
@@ -1832,7 +1832,8 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
SIModeRegisterDefaults Mode(F);
- Impl.HasFP32Denormals = Mode.allFP32Denormals();
+ Impl.HasFP32DenormalFlush =
+ Mode.FP32Denormals == DenormalMode::getPreserveSign();
return Impl.run(F);
}
@@ -1848,7 +1849,8 @@ PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
SIModeRegisterDefaults Mode(F);
- Impl.HasFP32Denormals = Mode.allFP32Denormals();
+ Impl.HasFP32DenormalFlush =
+ Mode.FP32Denormals == DenormalMode::getPreserveSign();
PreservedAnalyses PA = PreservedAnalyses::none();
if (!Impl.FlowChanged)
PA.preserveSet<CFGAnalyses>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 055cd81fa221b9..d724d244b22ffc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1883,7 +1883,8 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
bool UseFmadFtz = false;
if (Subtarget->isGCN()) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- UseFmadFtz = MFI->getMode().allFP32Denormals();
+ UseFmadFtz =
+ MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
}
// float fr = mad(fqneg, fb, fa);
@@ -1975,11 +1976,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// Compute denominator reciprocal.
- unsigned FMAD = !Subtarget->hasMadMacF32Insts() ?
- (unsigned)ISD::FMA :
- !MFI->getMode().allFP32Denormals() ?
- (unsigned)ISD::FMAD :
- (unsigned)AMDGPUISD::FMAD_FTZ;
+ unsigned FMAD =
+ !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
+ : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
+ ? (unsigned)ISD::FMAD
+ : (unsigned)AMDGPUISD::FMAD_FTZ;
SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 0adab52530840f..2305097e3f94d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -110,12 +110,12 @@ let GIIgnoreCopies = 1 in
class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
let RecomputePerFunction = 1 in {
-def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
-def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
-def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
+def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
+def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
+def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
+def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
+def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
+def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 12b5d9db6264ce..1ba52f3d2d8b74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2965,9 +2965,11 @@ bool AMDGPULegalizerInfo::legalizeFMad(
// TODO: Always legal with future ftz flag.
// FIXME: Do we need just output?
- if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals())
+ if (Ty == LLT::scalar(32) &&
+ MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign())
return true;
- if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals())
+ if (Ty == LLT::scalar(16) &&
+ MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign())
return true;
MachineIRBuilder HelperBuilder(MI);
@@ -4642,7 +4644,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
// aren't modeled as reading it.
- if (!Mode.allFP32Denormals())
+ if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(true, B, ST, Mode);
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
@@ -4652,7 +4654,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
- if (!Mode.allFP32Denormals())
+ // FIXME: This mishandles dynamic denormal mode. We need to query the
+ // current mode and restore the original.
+ if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(false, B, ST, Mode);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 85f84c84e16334..81d083c1c88ad2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -297,8 +297,9 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
SIModeRegisterDefaults Mode(F);
- HasFP32Denormals = Mode.allFP32Denormals();
- HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
+ HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
+ HasFP64FP16Denormals =
+ Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
}
bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a7d03d235ce694..76b51a2da0249f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -59,14 +59,14 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
cl::desc("Use indirect register addressing for divergent indexes"),
cl::init(false));
-static bool hasFP32Denormals(const MachineFunction &MF) {
+static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().allFP32Denormals();
+ return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign();
}
-static bool hasFP64FP16Denormals(const MachineFunction &MF) {
+static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().allFP64FP16Denormals();
+ return Info->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign();
}
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
@@ -830,10 +830,10 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
- DestVT.getScalarType() == MVT::f32 &&
- SrcVT.getScalarType() == MVT::f16 &&
- // TODO: This probably only requires no input flushing?
- !hasFP32Denormals(DAG.getMachineFunction());
+ DestVT.getScalarType() == MVT::f32 &&
+ SrcVT.getScalarType() == MVT::f16 &&
+ // TODO: This probably only requires no input flushing?
+ denormalModeIsFlushAllF32(DAG.getMachineFunction());
}
bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
@@ -843,7 +843,7 @@ bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
DestTy.getScalarSizeInBits() == 32 &&
SrcTy.getScalarSizeInBits() == 16 &&
// TODO: This probably only requires no input flushing?
- !hasFP32Denormals(*MI.getMF());
+ denormalModeIsFlushAllF32(*MI.getMF());
}
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -4646,7 +4646,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
// Otherwise f32 mad is always full rate and returns the same result as
// the separate operations so should be preferred over fma.
// However does not support denormals.
- if (hasFP32Denormals(MF))
+ if (!denormalModeIsFlushAllF32(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -4655,7 +4655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
case MVT::f64:
return true;
case MVT::f16:
- return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
+ return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16(MF);
default:
break;
}
@@ -4684,9 +4684,10 @@ bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
return false;
if (Ty.getScalarSizeInBits() == 16)
- return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
+ return Subtarget->hasMadF16() && denormalModeIsFlushAllF64F16(*MI.getMF());
if (Ty.getScalarSizeInBits() == 32)
- return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
+ return Subtarget->hasMadMacF32Insts() &&
+ denormalModeIsFlushAllF32(*MI.getMF());
return false;
}
@@ -4698,10 +4699,10 @@ bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if (VT == MVT::f32)
return Subtarget->hasMadMacF32Insts() &&
- !hasFP32Denormals(DAG.getMachineFunction());
+ denormalModeIsFlushAllF32(DAG.getMachineFunction());
if (VT == MVT::f16) {
return Subtarget->hasMadF16() &&
- !hasFP64FP16Denormals(DAG.getMachineFunction());
+ denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
}
return false;
@@ -9307,15 +9308,13 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
// Returns immediate value for setting the F32 denorm mode when using the
// S_DENORM_MODE instruction.
-static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
- const SDLoc &SL, const GCNSubtarget *ST) {
+static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG,
+ const SIMachineFunctionInfo *Info,
+ const GCNSubtarget *ST) {
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
- int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
- ? FP_DENORM_FLUSH_NONE
- : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
- int Mode = SPDenormMode | (DPDenormModeDefault << 2);
- return DAG.getTargetConstant(Mode, SL, MVT::i32);
+ uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
+ uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
+ return DAG.getTargetConstant(Mode, SDLoc(), MVT::i32);
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
@@ -9353,7 +9352,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
- const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const DenormalMode DenormMode = Info->getMode().FP32Denormals;
+
+ const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE();
if (!HasFP32Denormals) {
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
@@ -9365,7 +9368,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
- getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
+ getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
DAG.getEntryNode(), EnableDenormValue).getNode();
@@ -9405,10 +9408,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
NumeratorScaled, Fma3, Flags);
if (!HasFP32Denormals) {
+ // FIXME: This mishandles dynamic denormal mode. We need to query the
+ // current mode and restore the original.
+
SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
- const SDValue DisableDenormValue =
- getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
+ const SDValue DisableDenormValue = getSPDenormModeValue(
+ FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
@@ -11782,10 +11788,11 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
- if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
- (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
- getSubtarget()->hasMadF16())) &&
- isOperationLegal(ISD::FMAD, VT))
+ if (((VT == MVT::f32 &&
+ denormalModeIsFlushAllF32(DAG.getMachineFunction())) ||
+ (VT == MVT::f16 && Subtarget->hasMadF16() &&
+ denormalModeIsFlushAllF64F16(DAG.getMachineFunction()))) &&
+ isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
@@ -13743,10 +13750,10 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
EVT VT) const {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
case MVT::f32:
- return hasFP32Denormals(DAG.getMachineFunction());
+ return !denormalModeIsFlushAllF32(DAG.getMachineFunction());
case MVT::f64:
case MVT::f16:
- return hasFP64FP16Denormals(DAG.getMachineFunction());
+ return !denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
default:
return false;
}
@@ -13756,10 +13763,10 @@ bool SITargetLowering::denormalsEnabledForType(LLT Ty,
MachineFunction &MF) const {
switch (Ty.getScalarSizeInBits()) {
case 32:
- return hasFP32Denormals(MF);
+ return !denormalModeIsFlushAllF32(MF);
case 64:
case 16:
- return hasFP64FP16Denormals(MF);
+ return !denormalModeIsFlushAllF64F16(MF);
default:
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
index 6693bd51506003..df2e3f9bff32de 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
+++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
@@ -54,14 +54,6 @@ struct SIModeRegisterDefaults {
FP64FP16Denormals == Other.FP64FP16Denormals;
}
- bool allFP32Denormals() const {
- return FP32Denormals == DenormalMode::getIEEE();
- }
-
- bool allFP64FP16Denormals() const {
- return FP64FP16Denormals == DenormalMode::getIEEE();
- }
-
/// Get the encoding value for the FP_DENORM bits of the mode register for the
/// FP32 denormal mode.
uint32_t fpDenormModeSPValue() const {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
index e496f1823cf149..291f478e08faed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
@@ -34,6 +34,7 @@ name: test_f32_add_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
@@ -63,6 +64,7 @@ name: test_f16_f32_add_ext_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
@@ -99,6 +101,7 @@ name: test_f16_f32_add_ext_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
@@ -199,6 +202,7 @@ name: test_f16_f32_add_fma_ext_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
@@ -241,6 +245,7 @@ name: test_f16_f32_add_ext_fma_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
@@ -290,6 +295,7 @@ name: test_f16_f32_add_fma_ext_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -332,6 +338,7 @@ name: test_f16_f32_add_ext_fma_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -381,6 +388,7 @@ name: test_f32_sub_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
@@ -411,6 +419,7 @@ name: test_f32_sub_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
+ fp32-output-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index f9c98e85ed5ccc..f3e24702732762 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -4,7 +4,7 @@
; NOOP-LABEL: @noop_fdiv_fpmath(
; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
-define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #3 {
+define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) {
%md.25ulp = fdiv float %a, %b, !fpmath !0
store volatile float %md.25ulp, ptr addrspace(1) %out
ret void
@@ -337,9 +337,24 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(ptr addrspace(1) %out, floa
ret void
}
+; CHECK-LABEL: @rcp_fpmath_dynamic_denorm(
+; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !2
+define float @rcp_fpmath_dynamic_denorm(float %x) #3 {
+ %md.25ulp = fdiv float 1.0, %x, !fpmath !2
+ ret float %md.25ulp
+}
+
+; CHECK-LABEL: @rcp_dynamic_denorm(
+; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x
+define float @rcp_dynamic_denorm(float %x) #3 {
+ %md.25ulp = fdiv float 1.0, %x
+ ret float %md.25ulp
+}
+
attributes #0 = { nounwind optnone noinline }
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" }
!0 = !{float 2.500000e+00}
!1 = !{float 5.000000e-01}
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index 0937462373350f..5a6506f31582b2 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -370,8 +370,20 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}v_fdiv_f32_dynamic_denorm:
+; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX10: s_denorm_mode 15
+
+; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; GFX10: s_denorm_mode 12
+define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #3 {
+ %fdiv = fdiv float %a, %b
+ ret float %fdiv
+}
+
attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" "target-features"="-flat-for-global" }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index a4dbf34746f9f8..a8c1eaf9840151 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -3989,14 +3989,15 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
@@ -4012,14 +4013,15 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -4035,20 +4037,20 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; GFX900-SDAG-LABEL: v_exp_f32_afn_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4062,14 +4064,14 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4081,20 +4083,20 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; SI-SDAG-LABEL: v_exp_f32_afn_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4108,14 +4110,14 @@ define float @v_exp_f32_afn_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4306,14 +4308,15 @@ define float @v_exp_f32_daz(float %in) #0 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
@@ -4329,14 +4332,15 @@ define float @v_exp_f32_daz(float %in) #0 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -4352,20 +4356,20 @@ define float @v_exp_f32_daz(float %in) #0 {
; GFX900-SDAG-LABEL: v_exp_f32_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4379,14 +4383,14 @@ define float @v_exp_f32_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4398,20 +4402,20 @@ define float @v_exp_f32_daz(float %in) #0 {
; SI-SDAG-LABEL: v_exp_f32_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4425,14 +4429,14 @@ define float @v_exp_f32_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4620,14 +4624,15 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
@@ -4643,14 +4648,15 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -4666,20 +4672,20 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4693,14 +4699,14 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4712,20 +4718,20 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; SI-SDAG-LABEL: v_exp_f32_nnan_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4739,14 +4745,14 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4776,14 +4782,15 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
@@ -4799,14 +4806,15 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -4822,20 +4830,20 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4849,14 +4857,14 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4868,20 +4876,20 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -4895,14 +4903,14 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -4932,14 +4940,15 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
@@ -4951,14 +4960,15 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
@@ -4970,19 +4980,19 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -4993,13 +5003,13 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5008,19 +5018,19 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; SI-SDAG-LABEL: v_exp_f32_ninf_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5031,13 +5041,13 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5064,14 +5074,15 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5083,14 +5094,15 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5102,19 +5114,19 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5125,13 +5137,13 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5140,19 +5152,19 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5163,13 +5175,13 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5330,14 +5342,15 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5349,14 +5362,15 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5368,19 +5382,19 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5391,13 +5405,13 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5406,19 +5420,19 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5429,13 +5443,13 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5462,14 +5476,15 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5481,14 +5496,15 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
@@ -5500,19 +5516,19 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5523,13 +5539,13 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5538,19 +5554,19 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -5561,13 +5577,13 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -5622,14 +5638,15 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x3fb8a000, v4
-; VI-SDAG-NEXT: v_mad_f32 v3, v1, s4, -v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v5, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
@@ -5645,14 +5662,15 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -5668,20 +5686,20 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; GFX900-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -5695,14 +5713,14 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v2, v1
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -5714,20 +5732,20 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; SI-SDAG-LABEL: v_exp_f32_dynamic_mode:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
+; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -5741,14 +5759,14 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f
-; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v1
; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v1
+; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
+; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
@@ -6499,19 +6517,20 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8a000
+; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
-; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v3
-; VI-SDAG-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-SDAG-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4
-; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1
-; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
+; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
@@ -6526,16 +6545,17 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
-; VI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8a000
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x39a3b295, v1
-; VI-GISEL-NEXT: v_mad_f32 v1, v1, s4, -v2
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
@@ -6552,50 +6572,50 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2
-; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3
-; GFX900-SDAG-NEXT: v_mad_mix_f32 v0, v0, s4, -v2 op_sel_hi:[1,0,0]
-; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v3
-; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
-; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v1
+; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0
+; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v4, v1, s4, -v3
-; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v3
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v1, v2, v4
-; GFX900-GISEL-NEXT: v_mad_mix_f32 v0, v0, s4, -v3 op_sel_hi:[1,0,0]
-; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
+; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1
; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
-; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v1, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
+; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
@@ -6604,20 +6624,20 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f
; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0
; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-SDAG-NEXT: v_rndne_f32_e32 v1, v1
-; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
-; SI-SDAG-NEXT: v_mad_f32 v3, v0, s4, -v1
-; SI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
-; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2
-; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1
+; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f
+; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
+; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
+; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1
+; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3
; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0
; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218
-; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v2, v1
+; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0
@@ -6630,7 +6650,6 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3fb8aa3b
-; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000
; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
@@ -6638,15 +6657,16 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0
; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v2
-; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2
; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mad_f32 v3, v0, s4, -v2
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
-; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2
+; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1
+; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4
; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3
-; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0
+; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
+; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index f58df9274dac3d..9b2e878a96a23c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -3330,9 +3330,9 @@ define float @v_log_f32_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3345,9 +3345,9 @@ define float @v_log_f32_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3358,28 +3358,34 @@ define float @v_log_f32_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log_f32_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log_f32_daz:
@@ -3390,9 +3396,9 @@ define float @v_log_f32_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3405,9 +3411,9 @@ define float @v_log_f32_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3666,9 +3672,9 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3681,9 +3687,9 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3694,28 +3700,34 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log_f32_nnan_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log_f32_nnan_daz:
@@ -3726,9 +3738,9 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3741,9 +3753,9 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3803,11 +3815,11 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -3825,12 +3837,12 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -3850,13 +3862,16 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3873,13 +3888,16 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3895,11 +3913,11 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -3917,12 +3935,12 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -3996,9 +4014,9 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4011,9 +4029,9 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4024,28 +4042,34 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log_f32_ninf_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3f317000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log_f32_ninf_daz:
@@ -4056,9 +4080,9 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4071,9 +4095,9 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4133,11 +4157,11 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4155,12 +4179,12 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4180,13 +4204,16 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4203,13 +4230,16 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4225,11 +4255,11 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4247,12 +4277,12 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4499,25 +4529,25 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_log_f32_e32 v1, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
-; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
-; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
-; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
-; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
-; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_log_f32_nnan_ninf_daz:
@@ -4525,35 +4555,38 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_log_f32_e32 v0, v0
; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v2
-; VI-NEXT: v_mac_f32_e32 v0, 0x3805fdf4, v1
-; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v2
-; VI-NEXT: v_mac_f32_e32 v0, 0x3f317000, v1
+; VI-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1
+; VI-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
+; VI-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-NEXT: v_add_f32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317217, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
-; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3f317217, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
@@ -4607,13 +4640,13 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4626,14 +4659,14 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4647,13 +4680,16 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3805fdf4, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4668,12 +4704,15 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3f317000, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4687,13 +4726,13 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4706,14 +4745,14 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x41b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
@@ -4808,11 +4847,11 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4830,12 +4869,12 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4855,13 +4894,16 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4878,13 +4920,16 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3805fdf4, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3f317000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4900,11 +4945,11 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x41b17218
@@ -4922,12 +4967,12 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3f317217, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 432d407974f253..d622e654299519 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -3330,9 +3330,9 @@ define float @v_log10_f32_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3345,9 +3345,9 @@ define float @v_log10_f32_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3358,28 +3358,34 @@ define float @v_log10_f32_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log10_f32_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log10_f32_daz:
@@ -3390,9 +3396,9 @@ define float @v_log10_f32_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3405,9 +3411,9 @@ define float @v_log10_f32_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3666,9 +3672,9 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3681,9 +3687,9 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3694,28 +3700,34 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log10_f32_nnan_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log10_f32_nnan_daz:
@@ -3726,9 +3738,9 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3741,9 +3753,9 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3803,11 +3815,11 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -3825,12 +3837,12 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -3850,13 +3862,16 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -3873,13 +3888,16 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3895,11 +3913,11 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -3917,12 +3935,12 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -3996,9 +4014,9 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; SI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4011,9 +4029,9 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4024,28 +4042,34 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log10_f32_ninf_daz:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
-; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v3
-; VI-GISEL-NEXT: v_mac_f32_e32 v4, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log10_f32_ninf_daz:
@@ -4056,9 +4080,9 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf
; GFX900-SDAG-NEXT: s_mov_b32 s6, 0x7f800000
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s5, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6
; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4071,9 +4095,9 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v3, v0, s4, -v3
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v3
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4133,11 +4157,11 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4155,12 +4179,12 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4180,13 +4204,16 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4203,13 +4230,16 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4225,11 +4255,11 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4247,12 +4277,12 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4499,25 +4529,25 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
; SI-SDAG: ; %bb.0:
; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-SDAG-NEXT: v_log_f32_e32 v1, v0
+; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
-; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; SI-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
-; SI-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_log_f32_e32 v1, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
-; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
-; SI-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
-; SI-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
-; SI-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_log10_f32_nnan_ninf_daz:
@@ -4525,35 +4555,38 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_log_f32_e32 v0, v0
; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v2
-; VI-NEXT: v_mac_f32_e32 v0, 0x369a84fb, v1
-; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v2
-; VI-NEXT: v_mac_f32_e32 v0, 0x3e9a2000, v1
+; VI-NEXT: v_sub_f32_e32 v0, v0, v1
+; VI-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1
+; VI-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0
+; VI-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-NEXT: v_add_f32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
; GFX900-SDAG: ; %bb.0:
; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v0
+; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
-; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, -v0
+; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; GFX900-SDAG-NEXT: v_fma_f32 v0, v1, s4, v0
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a209a, v1
-; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, s4, -v0
-; GFX900-GISEL-NEXT: v_fma_f32 v0, v1, v2, v0
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v0, 0x3e9a209a, v1
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
@@ -4607,13 +4640,13 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; SI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; SI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4626,14 +4659,14 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; VI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4647,13 +4680,16 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-SDAG-NEXT: v_log_f32_e32 v0, v0
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0
-; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; VI-SDAG-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1
-; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x369a84fb, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v3, v0
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4668,12 +4704,15 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v0
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; VI-GISEL-NEXT: v_mac_f32_e32 v2, 0x3e9a2000, v1
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4687,13 +4726,13 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_fma_f32 v0, v0, s4, v2
+; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4706,14 +4745,14 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x411a209b
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v0, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
@@ -4808,11 +4847,11 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; SI-SDAG-NEXT: v_log_f32_e32 v0, v0
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; SI-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4830,12 +4869,12 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4855,13 +4894,16 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-SDAG-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v4, v2
+; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2
+; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -4878,13 +4920,16 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x369a84fb, v1
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v2
-; VI-GISEL-NEXT: v_mac_f32_e32 v3, 0x3e9a2000, v1
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1
+; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
+; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
+; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4900,11 +4945,11 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a
; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf
-; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1
+; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2
; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000
-; GFX900-SDAG-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
+; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x411a209b
@@ -4922,12 +4967,12 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_mac_f32_e32 v1, 0x3e9a209a, v0
; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 879e902594bc83..3df7f3c26aad82 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -279,6 +279,34 @@ bb:
ret void
}
+; Need to assume denormal handling is needed for dynamic denormal mode
+; GCN-LABEL: {{^}}v_mac_f32_dynamic:
+; GCN: v_mul_f32
+; GCN: v_add_f32
+define float @v_mac_f32_dynamic(float %a, float %b, float %c) "denormal-fp-math-f32"="dynamic,dynamic" {
+ %mul = fmul float %a, %b
+ %mad = fadd float %mul, %c
+ ret float %mad
+}
+
+; GCN-LABEL: {{^}}v_mac_f32_dynamic_daz:
+; GCN: v_mul_f32
+; GCN: v_add_f32
+define float @v_mac_f32_dynamic_daz(float %a, float %b, float %c) "denormal-fp-math-f32"="preserve-sign,dynamic" {
+ %mul = fmul float %a, %b
+ %mad = fadd float %mul, %c
+ ret float %mad
+}
+
+; GCN-LABEL: {{^}}v_mac_f32_dynamic_ftz:
+; GCN: v_mul_f32
+; GCN: v_add_f32
+define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-math-f32"="dynamic,preserve-sign" {
+ %mul = fmul float %a, %b
+ %mad = fadd float %mul, %c
+ ret float %mad
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
More information about the llvm-commits
mailing list