[llvm] r334242 - propagate fast math flags via IR on fma and sub expressions
Michael Berg via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 7 15:49:09 PDT 2018
Author: mcberg2017
Date: Thu Jun 7 15:49:09 2018
New Revision: 334242
URL: http://llvm.org/viewvc/llvm-project?rev=334242&view=rev
Log:
propagate fast math flags via IR on fma and sub expressions
Summary: This change uses fmf subflags to guard fma optimizations as well as unsafe. These changes originated from D46483 and have been simplified via getNode.
Reviewers: spatel, arsenm, hfinkel, javed.absar
Reviewed By: spatel
Subscribers: nemanjai, wdng
Differential Revision: https://reviews.llvm.org/D47388
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=334242&r1=334241&r2=334242&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Thu Jun 7 15:49:09 2018
@@ -889,7 +889,8 @@ public:
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
- SDValue N2, SDValue N3);
+ SDValue N2, SDValue N3,
+ const SDNodeFlags Flags = SDNodeFlags());
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=334242&r1=334241&r2=334242&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun 7 15:49:09 2018
@@ -9626,6 +9626,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
if (!HasFMAD && !HasFMA)
return SDValue();
+ SDNodeFlags Flags = N->getFlags();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath || HasFMAD);
// If the addition is not contractable, do not combine.
@@ -9657,14 +9658,14 @@ SDValue DAGCombiner::visitFADDForFMAComb
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ N0.getOperand(0), N0.getOperand(1), N1, Flags);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ N1.getOperand(0), N1.getOperand(1), N0, Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9678,7 +9679,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+ N00.getOperand(1)), N1, Flags);
}
}
@@ -9692,7 +9693,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ N10.getOperand(1)), N0, Flags);
}
}
@@ -9710,7 +9711,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- N1));
+ N1, Flags), Flags);
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
@@ -9725,19 +9726,20 @@ SDValue DAGCombiner::visitFADDForFMAComb
DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
- N0));
+ N0, Flags), Flags);
}
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -9747,7 +9749,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9758,14 +9760,15 @@ SDValue DAGCombiner::visitFADDForFMAComb
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -9775,7 +9778,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9790,7 +9793,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9808,7 +9811,7 @@ SDValue DAGCombiner::visitFADDForFMAComb
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9837,8 +9840,10 @@ SDValue DAGCombiner::visitFSUBForFMAComb
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SDNodeFlags Flags = N->getFlags();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath || HasFMAD);
+
// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
@@ -9863,7 +9868,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
@@ -9872,7 +9877,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
- N1.getOperand(1), N0);
+ N1.getOperand(1), N0, Flags);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
@@ -9881,7 +9886,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9897,7 +9902,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
}
@@ -9914,7 +9919,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)),
- N0);
+ N0, Flags);
}
}
@@ -9936,7 +9941,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9959,7 +9964,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9979,7 +9984,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -9996,8 +10001,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20),
-
- N21, N0));
+ N21, N0, Flags), Flags);
}
@@ -10017,7 +10021,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -10045,7 +10049,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -10068,7 +10072,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1201),
- N0));
+ N0, Flags), Flags);
}
}
@@ -10099,7 +10103,7 @@ SDValue DAGCombiner::visitFSUBForFMAComb
VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1021),
- N0));
+ N0, Flags), Flags);
}
}
}
@@ -10115,6 +10119,7 @@ SDValue DAGCombiner::visitFMULForFMADist
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
+ const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -10146,52 +10151,54 @@ SDValue DAGCombiner::visitFMULForFMADist
// fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
// fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y) {
+ auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1))
+ if (SDValue FMA = FuseFADD(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0))
+ if (SDValue FMA = FuseFADD(N1, N0, Flags))
return FMA;
// fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
// fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
// fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
// fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
if (XC0 && XC0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y);
+ Y, Flags);
if (XC0 && XC0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1))
+ if (SDValue FMA = FuseFSUB(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0))
+ if (SDValue FMA = FuseFSUB(N1, N0, Flags))
return FMA;
return SDValue();
@@ -10602,6 +10609,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N)
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ // FMA nodes have flags that propagate to the created nodes.
+ const SDNodeFlags Flags = N->getFlags();
+
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
@@ -10626,11 +10636,6 @@ SDValue DAGCombiner::visitFMA(SDNode *N)
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // TODO: FMA nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with reassociation math transforms.
- SDNodeFlags Flags;
- Flags.setAllowReassociation(true);
-
if (Options.UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=334242&r1=334241&r2=334242&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Jun 7 15:49:09 2018
@@ -4859,7 +4859,8 @@ SDValue SelectionDAG::getNode(unsigned O
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3) {
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
@@ -4953,10 +4954,13 @@ SDValue SelectionDAG::getNode(unsigned O
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
+ }
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
Modified: llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll?rev=334242&r1=334241&r2=334242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll Thu Jun 7 15:49:09 2018
@@ -39,7 +39,7 @@ define float @fmul_fadd_contract1(float
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
-; FMFDEBUG: fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
define float @fmul_fadd_contract2(float %x, float %y, float %z) {
@@ -86,7 +86,7 @@ define float @fmul_fadd_reassoc1(float %
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
-; FMFDEBUG: fma {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
@@ -109,7 +109,7 @@ define float @fmul_fadd_reassoc2(float %
; The fadd is now fully 'fast'. This implies that contraction is allowed.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
-; FMFDEBUG: fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
define float @fmul_fadd_fast1(float %x, float %y, float %z) {
@@ -132,7 +132,7 @@ define float @fmul_fadd_fast1(float %x,
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
-; FMFDEBUG: fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
define float @fmul_fadd_fast2(float %x, float %y, float %z) {
@@ -192,6 +192,7 @@ define float @fmul_fma_reassoc1(float %x
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
+; FMFDEBUG: fmul reassoc {{t[0-9]+}}
; FMFDEBUG: fma reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
@@ -232,7 +233,7 @@ define float @fmul_fma_reassoc2(float %x
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
define float @fmul_fma_fast1(float %x) {
@@ -264,11 +265,12 @@ define float @fmul_fma_fast1(float %x) {
; This shouldn't change anything - the intermediate fmul result is now also flagged.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
+; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
define float @fmul_fma_fast2(float %x) {
Modified: llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll?rev=334242&r1=334241&r2=334242&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll Thu Jun 7 15:49:09 2018
@@ -33,12 +33,12 @@ define float @rfoo(float %f) #0 {
; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0
; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %1
; CHECK: %4:fr32 = VMOVSSrm
-; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4
+; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %1, killed %3, %4
; CHECK: %6:fr32 = VMOVSSrm
; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %1, %6
; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %7, killed %5
; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %8
-; CHECK: %10:fr32 = VFMADD213SSr %8, killed %9, %4
+; CHECK: %10:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %8, killed %9, %4
; CHECK: %11:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %8, %6
; CHECK: %12:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %11, killed %10
; CHECK: $xmm0 = COPY %12
More information about the llvm-commits
mailing list