[PATCH] Flag to enable IEEE-754 friendly FP optimizations
Sergey Dmitrouk via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 14 08:22:30 PDT 2015
Hi Hal,
On Thu, Aug 27, 2015 at 07:47:17PM -0700, Hal Finkel wrote:
> I think you might as well introduce new SDAG node types, FADD_W_CHAIN,
> etc. We're essentially not going to optimize them anyway, so I'm not
> worried about losing existing DAGCombine optimizations. The tricky part
> is that you need to instruction select these nodes into instructions that
> have side effects at the MI level, and this probably requires modifying
> the backends (it needs to have variants of the existing FP instructions
> marked with side effects). But this is probably unavoidable, and luckily,
> most of the changes seem largely rote.
I tried introducing new instruction, but got many instruction selection
errors, which reminded me of issues people face when trying to disable
DAGCombine for -O0 builds (can't be done easily because selection implicitly
depends on DAGCombine).
So I tried to return to adding chain to FADD, please take a look at
attached sample patch and say how bad does it look, there are changes to
make FADD with chain work. Side effects part is missing and it seems
to require new instruction, but changes in the patch will be necessary
anyway. Don't go into much details, there are only three targets, and I
basically would like to know how likely something like that to be accepted.
Thanks,
Sergey
-------------- next part --------------
include/llvm/CodeGen/SelectionDAG.h | 6 +
include/llvm/CodeGen/SelectionDAGNodes.h | 19 +-
include/llvm/Target/TargetSelectionDAG.td | 2 +-
lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 145 +++++++----
lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 35 ++-
lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 27 +-
lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 18 +-
lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 51 ++++
lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 302 +++++++++++++++++++++--
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 95 ++++---
lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 5 +-
lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 +-
lib/Target/X86/X86ISelLowering.cpp | 29 ++-
13 files changed, 610 insertions(+), 129 deletions(-)
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 8d198ab..7eadcbe 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -683,6 +683,8 @@ public:
const SDNodeFlags *Flags = nullptr);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
SDValue N3);
+ SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1,
+ SDValue N2, SDValue N3, const SDNodeFlags *Flags);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
SDValue N3, SDValue N4);
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
@@ -1266,6 +1268,10 @@ private:
SDValue N1, SDValue N2,
const SDNodeFlags *Flags = nullptr);
+ BinarySDNode *GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags *Flags = nullptr);
+
/// Look up the node specified by ID in CSEMap. If it exists, return it. If
/// not, return the insertion token that will make insertion faster. This
/// overload is for nodes other than Constant or ConstantFP, use the other one
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index dcc43fd..3704bb1 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -118,7 +118,7 @@ public:
SDNode *getNode() const { return Node; }
/// set the SDNode
- void setNode(SDNode *N) { Node = N; }
+ inline void setNode(SDNode *N);
inline SDNode *operator->() const { return Node; }
@@ -875,6 +875,11 @@ public:
// Define inline functions from the SDValue class.
+void SDValue::setNode(SDNode *N) {
+ assert((!N || ResNo < N->getNumValues()) && "Wrong ResNo for new node.");
+ Node = N;
+}
+
inline SDValue::SDValue(SDNode *node, unsigned resno)
: Node(node), ResNo(resno) {
assert((!Node || ResNo < Node->getNumValues()) &&
@@ -1023,13 +1028,18 @@ public:
/// This class is used for two-operand SDNodes. This is solely
/// to allow co-allocation of node operands with the node itself.
class BinarySDNode : public SDNode {
- SDUse Ops[2];
+ SDUse Ops[3];
public:
BinarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
SDValue X, SDValue Y)
: SDNode(Opc, Order, dl, VTs) {
InitOperands(Ops, X, Y);
}
+ BinarySDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
+ SDValue Chain, SDValue X, SDValue Y)
+ : SDNode(Opc, Order, dl, VTs) {
+ InitOperands(Ops, Chain, X, Y);
+ }
};
/// Returns true if the opcode is a binary operation with flags.
@@ -1062,6 +1072,10 @@ public:
BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
SDValue X, SDValue Y, const SDNodeFlags &NodeFlags)
: BinarySDNode(Opc, Order, dl, VTs, X, Y), Flags(NodeFlags) {}
+ BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
+ SDValue Chain, SDValue X, SDValue Y,
+ const SDNodeFlags &NodeFlags)
+ : BinarySDNode(Opc, Order, dl, VTs, Chain, X, Y), Flags(NodeFlags) {}
static bool classof(const SDNode *N) {
return isBinOpWithFlags(N->getOpcode());
}
@@ -1921,6 +1935,7 @@ public:
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
SubclassData |= AM << 2;
assert(getAddressingMode() == AM && "MemIndexedMode encoding error!");
+ assert(Operands[0].getValueType() == MVT::Other && "Expected chain!");
InitOperands(Ops, Operands, numOperands);
assert((getOffset().getOpcode() == ISD::UNDEF || isIndexed()) &&
"Only indexed loads and stores have a non-undef offset operand");
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 7484bb8..25a89ff 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -404,7 +404,7 @@ def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;
def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
-def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
+def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b75ac3f..b235da5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -618,17 +618,17 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
assert(Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ if (isNegatibleForFree(Op.getOperand(1), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
- Op.getOperand(1));
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth + 1),
+ Op.getOperand(2));
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(1), DAG,
+ GetNegatedExpression(Op.getOperand(2), DAG,
LegalOperations, Depth+1),
- Op.getOperand(0));
+ Op.getOperand(1));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
@@ -1296,8 +1296,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
- assert(N->getValueType(0) == RV.getValueType() &&
- N->getNumValues() == 1 && "Type mismatch");
+ assert(N->getValueType(0) == RV.getValueType() && "Type mismatch");
+ assert(N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
DAG.ReplaceAllUsesWith(N, &OpV);
}
@@ -1412,6 +1412,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
}
SDValue DAGCombiner::combine(SDNode *N) {
+ if (N->getOpcode() == ISD::FADD && N->getOperand(0))
+ return SDValue();
+
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
@@ -7405,8 +7408,9 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue N0 = N->getOperand(1);
+ SDValue N1 = N->getOperand(2);
EVT VT = N->getValueType(0);
SDLoc SL(N);
@@ -7436,16 +7440,16 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ return DAG.getNode(PreferredFusedOpcode, SL, DAG.getVTList(VT, MVT::Other),
+ Chain, N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ return DAG.getNode(PreferredFusedOpcode, SL, DAG.getVTList(VT, MVT::Other),
+ Chain, N1.getOperand(0), N1.getOperand(1), N0);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -7454,7 +7458,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -7466,7 +7472,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -7479,9 +7487,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
N1));
@@ -7490,9 +7502,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
if (N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
N0));
@@ -7503,8 +7519,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
+ X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
@@ -7527,10 +7548,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&] (
SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL,
+ DAG.getVTList(VT, MVT::Other),
+ Chain,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
@@ -7862,11 +7887,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+ return DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+ return DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ N1, N0);
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
@@ -7893,8 +7920,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+ return DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(),
+ N0.getOperand(0),
+ DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ N0.getOperand(1), N1));
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7914,7 +7945,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(),
+ SDValue(CFP01, 0),
DAG.getConstantFP(1.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
@@ -7923,7 +7957,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(),
+ SDValue(CFP01, 0),
DAG.getConstantFP(2.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
@@ -7935,7 +7972,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(),
+ SDValue(CFP11, 0),
DAG.getConstantFP(1.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
@@ -7944,7 +7984,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(),
+ SDValue(CFP11, 0),
DAG.getConstantFP(2.0, DL, VT));
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
@@ -8012,8 +8055,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return DAG.getNode(ISD::FADD, dl, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations));
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ N0, GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
@@ -8130,7 +8173,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+ return DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8176,9 +8220,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return N2;
}
if (N0CFP && N0CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ return DAG.getNode(ISD::FADD, SDLoc(N), DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+ return DAG.getNode(ISD::FADD, SDLoc(N), DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (N0CFP && !N1CFP)
@@ -8190,7 +8236,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
N0 == N2.getOperand(0) &&
N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N1, N2.getOperand(1)));
}
@@ -8208,28 +8255,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
AddToWorklist(RHSNeg.getNode());
- return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N2, RHSNeg);
}
}
// (fma x, c, x) -> (fmul x, (c+1))
if (Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT)));
+ DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N1,
+ DAG.getConstantFP(1.0, dl, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT)));
+ DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), N1,
+ DAG.getConstantFP(-1.0, dl, VT)));
return SDValue();
@@ -13681,7 +13732,8 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ Est = DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), Est, NewEst);
AddToWorklist(Est.getNode());
}
}
@@ -13751,7 +13803,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
AddToWorklist(Est.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ Est = DAG.getNode(ISD::FADD, DL, DAG.getVTList(VT, MVT::Other), DAG.getEntryNode(),
+ Est, MinusThree);
AddToWorklist(Est.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 0396736..45a390c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2065,6 +2065,9 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListEntry Entry;
for (const SDValue &Op : Node->op_values()) {
EVT ArgVT = Op.getValueType();
+ if (Node->getOpcode() == ISD::FADD && ArgVT == MVT::Other)
+ continue;
+
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
@@ -2083,6 +2086,9 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// node which is being folded has a non-entry input chain.
SDValue InChain = DAG.getEntryNode();
+ if (Node->getOpcode() == ISD::FADD)
+ InChain = Node->getOperand(0);
+
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position.
SDValue TCChain = InChain;
@@ -2513,7 +2519,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
TwoP84PlusTwoP52);
- return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f64, MVT::Other),
+ DAG.getEntryNode(), LoFlt, HiSub);
}
// Implementation of unsigned i64 to f32.
@@ -2532,7 +2539,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), SignCvt, SignCvt);
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@@ -2572,7 +2581,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
- SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ DAG.getEntryNode(), Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0, dl));
}
@@ -2624,7 +2635,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
FudgeInReg = Handle.getValue();
}
- return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(DestVT, MVT::Other),
+ DAG.getEntryNode(), Tmp1, FudgeInReg);
}
/// This function is responsible for legalizing a
@@ -3479,6 +3491,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128,
RTLIB::ADD_PPCF128));
+ Results.push_back(DAG.getEntryNode());
break;
case ISD::FMUL:
Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
@@ -3535,7 +3548,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Tmp1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), Node->getOperand(0), Tmp1);
Results.push_back(Tmp1);
} else {
Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
@@ -4277,7 +4291,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1, Tmp2, Node->getOperand(4)));
break;
}
- case ISD::FADD:
+ case ISD::FADD: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ Node->getOperand(0), Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0, dl)));
+ Results.push_back(DAG.getEntryNode());
+ break;
+ }
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a41..e936f0b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -183,15 +183,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ SDValue Ops[3] = { N->getOperand(0),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ SDValue Val, Ch;
+ std::tie(Val, Ch) = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N)).first;
+ NVT, Ops, 3, false, SDLoc(N));
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ return Val;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -1908,6 +1912,21 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
// action. Construct a new SDNode with the promoted float values of the old
// operands.
SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+ if (N->getOpcode() == ISD::FADD) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(1));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(2));
+
+ SDValue R = DAG.getNode(N->getOpcode(), SDLoc(N),
+ DAG.getVTList(NVT, MVT::Other), N->getOperand(0),
+ Op0, Op1);
+
+ ReplaceValueWith(SDValue(N, 1), R.getValue(1));
+
+ return R.getValue(0);
+ }
+
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 0f25a61..927cbf6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -192,10 +192,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// Legalize the operands
SmallVector<SDValue, 8> Ops;
- for (const SDValue &Op : Node->op_values())
- Ops.push_back(LegalizeOp(Op));
+ for (const SDValue &Op : Node->op_values()) {
+ if (Op.getOpcode() == ISD::FADD && Op.getValueType() == MVT::Other)
+ Ops.push_back(Op.getOperand(0));
+ else
+ Ops.push_back(LegalizeOp(Op));
+ }
- SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
+ Op.getResNo());
bool HasVectorValue = false;
if (Op.getOpcode() == ISD::LOAD) {
@@ -394,7 +399,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
// 2) Extending a vector of floats to a vector of the same number of larger
// floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
MVT VT = Op.getSimpleValueType();
- assert(Op.getNode()->getNumValues() == 1 &&
+ assert(((Op.getOpcode() == ISD::FADD && Op.getNode()->getNumValues() == 2) ||
+ Op.getNode()->getNumValues() == 1) &&
"Can't promote a vector with multiple results!");
MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
SDLoc dl(Op);
@@ -1004,7 +1010,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
// Add the two halves
- return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+ return DAG.getNode(ISD::FADD, DL,
+ DAG.getVTList(Op.getValueType(), MVT::Other),
+ DAG.getEntryNode(), fHI, fLO);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5f9afc9..4da5acb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -136,6 +136,20 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ if (N->getOpcode() == ISD::FADD) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ SDValue RHS = GetScalarizedVector(N->getOperand(2));
+ SDValue Ch = DAG.getNode(N->getOpcode(), SDLoc(N),
+ DAG.getVTList(LHS.getValueType(), MVT::Other),
+ N->getOperand(0), LHS, RHS);
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch.getValue(1));
+
+ return Ch.getValue(0);
+ }
+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N),
@@ -694,6 +708,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
+ if (N->getOpcode() == ISD::FADD) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(1), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(2), RHSLo, RHSHi);
+ SDLoc dl(N);
+
+ SDValue Ch = N->getOperand(0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(LHSLo.getValueType(), MVT::Other),
+ Ch, LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(LHSHi.getValueType(), MVT::Other),
+ Ch, LHSHi, RHSHi);
+
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ Lo = Lo.getValue(0);
+ Hi = Hi.getValue(0);
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ return;
+ }
+
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -2040,6 +2082,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ if (N->getOpcode() == ISD::FADD) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ SDValue Ch = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(WidenVT, MVT::Other),
+ N->getOperand(0), InOp1, InOp2);
+ ReplaceValueWith(SDValue(N, 1), Ch.getValue(1));
+ return Ch.getValue(0);
+ }
+
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0efac31..ce805ab 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -974,6 +974,27 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
return N;
}
+BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
+ SDVTList VTs, SDValue N1,
+ SDValue N2, SDValue N3,
+ const SDNodeFlags *Flags) {
+ if (isBinOpWithFlags(Opcode)) {
+ // If no flags were passed in, use a default flags object.
+ SDNodeFlags F;
+ if (Flags == nullptr)
+ Flags = &F;
+
+ BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
+ Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3, *Flags);
+
+ return FN;
+ }
+
+ BinarySDNode *N = new (NodeAllocator)
+ BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3);
+ return N;
+}
+
SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
@@ -3284,6 +3305,8 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SDValue N2, const SDNodeFlags *Flags) {
+ assert(Opcode != ISD::FADD && "FADD instruction requires chain.");
+
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
switch (Opcode) {
@@ -3358,22 +3381,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
- case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
if (getTarget().Options.UnsafeFPMath) {
- if (Opcode == ISD::FADD) {
- // 0+x --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
- if (CFP->getValueAPF().isZero())
- return N2;
- // x+0 --> x
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
- if (CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FSUB) {
+ if (Opcode == ISD::FSUB) {
// x-0 --> x
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
if (CFP->getValueAPF().isZero())
@@ -3644,11 +3657,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
- case ISD::FADD:
- s = V1.add(V2, APFloat::rmNearestTiesToEven);
- if (isSafeToOptimizeFPOp(Flags, s, APFloat::opInvalidOp))
- return getConstantFP(V1, DL, VT);
- break;
case ISD::FSUB:
s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
if (isSafeToOptimizeFPOp(Flags, s, APFloat::opInvalidOp))
@@ -3740,7 +3748,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
case ISD::UREM:
case ISD::SREM:
return N2; // fold op(arg1, undef) -> undef
- case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FDIV:
@@ -3791,6 +3798,224 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
+ SDValue Chain, SDValue N1, SDValue N2,
+ const SDNodeFlags *Flags) {
+ EVT VT = VTs.VTs[0];
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ switch (Opcode) {
+ default: break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // 0+x --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+ if (CFP->getValueAPF().isZero())
+ return N2;
+ // x+0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+ if (CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FMUL) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
+ SDValue V = N2;
+
+ // If the first operand isn't the constant, try the second
+ if (!CFP) {
+ CFP = dyn_cast<ConstantFPSDNode>(N2);
+ V = N1;
+ }
+
+ if (CFP) {
+ // 0*x --> 0
+ if (CFP->isZero())
+ return SDValue(CFP,0);
+ // 1*x --> x
+ if (CFP->isExactlyValue(1.0))
+ return V;
+ }
+ }
+ }
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ }
+
+ // Perform trivial constant folding.
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
+ return SV;
+
+ // Canonicalize constant to RHS if commutative.
+ if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ }
+
+ // Constant fold FP operations.
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+ if (N1CFP) {
+ if (!N2CFP && isCommutativeBinOp(Opcode)) {
+ // Canonicalize constant to RHS if commutative.
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ } else if (N2CFP) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (isSafeToOptimizeFPOp(Flags, s, APFloat::opInvalidOp))
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (isSafeToOptimizeFPOp(Flags, s, APFloat::opInvalidOp))
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (isSafeToOptimizeFPOp(Flags, s, APFloat::opInvalidOp))
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (isSafeToOptimizeFPOp(Flags, s,
+ (APFloat::opStatus)(APFloat::opInvalidOp |
+ APFloat::opDivByZero))) {
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ case ISD::FREM:
+ s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+ if (isSafeToOptimizeFPOp(Flags, s,
+ (APFloat::opStatus)(APFloat::opInvalidOp |
+ APFloat::opDivByZero))) {
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, DL, VT);
+ default: break;
+ }
+ }
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.getOpcode() == ISD::UNDEF) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.getOpcode() == ISD::UNDEF) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.getOpcode() == ISD::UNDEF)
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, DL, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDValue Ops[] = {N1, N2};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ AddNodeIDFlags(ID, Opcode, Flags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ return SDValue(E, 0);
+
+ BinarySDNode *N = GetBinarySDNode(Opcode, DL, VTs, Chain, N1, N2, Flags);
+
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
bool SelectionDAG::isSafeToOptimizeFPOp(const SDNodeFlags *Flags,
APFloat::opStatus s,
APFloat::opStatus UnsafeOps) const {
@@ -3810,6 +4035,31 @@ bool SelectionDAG::isSafeToOptimizeFPOp(const SDNodeFlags *Flags,
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
+
+ if (Opcode == ISD::FADD) {
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = { N1, N2, N3 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ return SDValue(E, 0);
+
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
+ }
+
+ InsertNode(N);
+ return SDValue(N, 0);
+ }
+
// Perform various simplifications.
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
switch (Opcode) {
@@ -5438,9 +5688,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
DL.getDebugLoc(), VTList, Ops[0],
Ops[1]);
} else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0],
- Ops[1], Ops[2]);
+ if (Opcode == ISD::FADD)
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1], Ops[2]);
+ else
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList,
+ Ops[0], Ops[1], Ops[2]);
} else {
N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
VTList, Ops);
@@ -6790,7 +7045,8 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
}
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
- assert(N->getNumValues() == 1 &&
+ assert(((N->getOpcode() == ISD::FADD && N->getNumValues() == 2) ||
+ N->getNumValues() == 1) &&
"Can't unroll a vector with multiple results!");
EVT VT = N->getValueType(0);
@@ -6828,6 +7084,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
default:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
break;
+ case ISD::FADD:
+ Scalars.push_back(getNode(N->getOpcode(), dl,
+ getVTList(EltVT, MVT::Other), Operands));
+ break;
case ISD::VSELECT:
Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index da8ce3d..1b0be84 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2158,9 +2158,22 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setKeepExceptions(FMF.keepExceptions());
Flags.setKeepRounding(FMF.keepRounding());
}
- SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
- Op1, Op2, &Flags);
- setValue(&I, BinNodeValue);
+ if (OpCode == ISD::FADD) {
+ SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(),
+ DAG.getVTList(Op1.getValueType(),
+ MVT::Other),
+ getRoot(), Op1, Op2, &Flags);
+ setValue(&I, BinNodeValue.getValue(0));
+
+ if (BinNodeValue->getNumValues() == 2) {
+ DAG.setRoot(BinNodeValue.getValue(1));
+ }
+ } else {
+ SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(),
+ Op1.getValueType(),
+ Op1, Op2, &Flags);
+ setValue(&I, BinNodeValue);
+ }
}
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
@@ -3526,10 +3539,13 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
// error 0.0144103317, which is 6 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3e814304, dl));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t2,
getF32Constant(DAG, 0x3f3c50c8, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x3f7f5e7e, dl));
} else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3542,13 +3558,17 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
// error 0.000107046256, which is 13 to 14 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3da235e3, dl));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t2,
getF32Constant(DAG, 0x3e65b8f3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x3f324b07, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl,
+ DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t6,
getF32Constant(DAG, 0x3f7ff8fd, dl));
} else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3563,22 +3583,22 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
// error 2.47208000*10^(-7), which is better than 18 bits
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0x3924b03e, dl));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t2,
getF32Constant(DAG, 0x3ab24b87, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x3c1d8c17, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t6,
getF32Constant(DAG, 0x3d634a1d, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t8,
getF32Constant(DAG, 0x3e75fe14, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t10,
getF32Constant(DAG, 0x3f317234, dl));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t12,
getF32Constant(DAG, 0x3f800000, dl));
}
@@ -3637,7 +3657,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0034276066, which is better than 8 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbe74c456, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3fb3a2b1, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
@@ -3654,13 +3674,15 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.000061011436, which is 14 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbd67b6d6, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3ee4f4b8, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fbc278b, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x40348e95, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
@@ -3679,26 +3701,27 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0000023660568, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbc91e5ac, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other),
+ DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3e4350aa, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f60d3e3, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x4011cdf0, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x406cfd1c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t8,
getF32Constant(DAG, 0x408797cb, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4006dcab, dl));
}
- return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), LogOfExponent, LogOfMantissa);
}
// No special expansion.
@@ -3731,7 +3754,7 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0049451742, which is more than 7 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbeb08fe0, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x40019463, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
@@ -3748,13 +3771,13 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0000876136000, which is better than 13 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbda7262e, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3f25280b, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x4007b923, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x40823e2f, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
@@ -3774,26 +3797,26 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0000018516, which is better than 18 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbcd2769e, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3e8ce0b9, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3fa22ae7, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t4,
getF32Constant(DAG, 0x40525723, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
getF32Constant(DAG, 0x40aaf200, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t8,
getF32Constant(DAG, 0x40c39dad, dl));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
getF32Constant(DAG, 0x4042902c, dl));
}
- return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), LogOfExponent, Log2ofMantissa);
}
// No special expansion.
@@ -3828,7 +3851,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// error 0.0014886165, which is 6 bits
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
getF32Constant(DAG, 0xbdd49a13, dl));
- SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t0,
getF32Constant(DAG, 0x3f1c0789, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
@@ -3847,7 +3870,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3ea21fb2, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t2,
getF32Constant(DAG, 0x3f6ae232, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
@@ -3868,20 +3891,20 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3e00685a, dl));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t2,
getF32Constant(DAG, 0x3efb6798, dl));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f88d192, dl));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), t6,
getF32Constant(DAG, 0x3fc4316c, dl));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
getF32Constant(DAG, 0x3f57ce70, dl));
}
- return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ return DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f32, MVT::Other), DAG.getEntryNode(), LogOfExponent, Log10ofMantissa);
}
// No special expansion.
@@ -4615,8 +4638,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)));
SDValue Add = DAG.getNode(ISD::FADD, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- Mul,
+ DAG.getVTList(
+ getValue(I.getArgOperand(0)).getValueType(),
+ MVT::Other),
+ DAG.getEntryNode(), Mul,
getValue(I.getArgOperand(2)));
setValue(&I, Add);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c3dab9c..a1fce47 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3158,7 +3158,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
- Ops.push_back(InputChain);
+ if (InputChain.getNode())
+ Ops.push_back(InputChain);
+ else
+ Ops.push_back(CurDAG->getEntryNode());
if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
Ops.push_back(InputGlue);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bacf8be..9a5961d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -93,7 +93,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
Args.reserve(NumOps);
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
+ for (unsigned i = NumOps && Ops[0].getValueType() == MVT::Other; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
@@ -108,7 +108,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ CLI.setDebugLoc(dl)
+ .setChain(NumOps && Ops[0].getValueType() == MVT::Other ? Ops[0] : DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
.setSExtResult(signExtend).setZExtResult(!signExtend);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a17d0da..343d49c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12007,8 +12007,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
S2F, 0x4E, DAG);
- Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
- DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
+ Result = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::v2f64, MVT::Other),
+ DAG.getEntryNode(), DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -12159,10 +12159,12 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
SDValue FHigh =
- DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
+ DAG.getNode(ISD::FADD, DL, DAG.getVTList(VecFloatVT, MVT::Other),
+ DAG.getEntryNode(), HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
- return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
+ return DAG.getNode(ISD::FADD, DL, DAG.getVTList(VecFloatVT, MVT::Other),
+ DAG.getEntryNode(), LowBitcast, FHigh);
}
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
@@ -12277,7 +12279,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
FudgePtr, MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, DAG.getVTList(MVT::f80, MVT::Other),
+ DAG.getEntryNode(), Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
}
@@ -15484,6 +15487,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
}
}
+ if (IntrData->Opc0 == ISD::FADD)
+ // XXX: chain is basically a fake one here.
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), Src1, Src2),
+ Mask, PassThru, Subtarget, DAG);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1,Src2),
Mask, PassThru, Subtarget, DAG);
@@ -15501,6 +15510,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Rnd = Op.getOperand(5);
else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ if (IntrData->Opc0 == ISD::FADD)
+ // XXX: chain is basically a fake one here.
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl,
+ DAG.getVTList(VT, MVT::Other),
+ DAG.getEntryNode(), Src1, Src2,
+ Rnd),
+ Mask, PassThru, Subtarget, DAG);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
@@ -24940,7 +24956,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
else
Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
- return DAG.getNode(Opcode, dl, VT, A, B, C);
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::Other),
+ N->getOperand(0), A, B, C);
}
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
More information about the llvm-commits
mailing list