[llvm] r327730 - [X86] Merge ADDSUB/SUBADD detection into single methods that can detect either and indicate what they found.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 16 11:25:59 PDT 2018
Author: ctopper
Date: Fri Mar 16 11:25:59 2018
New Revision: 327730
URL: http://llvm.org/viewvc/llvm-project?rev=327730&view=rev
Log:
[X86] Merge ADDSUB/SUBADD detection into single methods that can detect either and indicate what they found.
Previously, we called the same functions twice with a bool flag determining whether we should look for ADDSUB or SUBADD. It would be more efficient to run the code once and detect either pattern with a flag to tell which type it found.
Differential Revision: https://reviews.llvm.org/D44540
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=327730&r1=327729&r2=327730&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Mar 16 11:25:59 2018
@@ -7512,7 +7512,7 @@ static bool isAddSubOrSubAdd(const Build
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1,
unsigned &NumExtracts,
- bool matchSubAdd) {
+ bool &IsSubAdd) {
MVT VT = BV->getSimpleValueType(0);
if (!Subtarget.hasSSE3() || !VT.isFloatingPoint())
@@ -7525,26 +7525,20 @@ static bool isAddSubOrSubAdd(const Build
NumExtracts = 0;
// Odd-numbered elements in the input build vector are obtained from
- // adding two integer/float elements.
+ // adding/subtracting two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
- // subtracting two integer/float elements.
- unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB;
- unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD;
- bool AddFound = false;
- bool SubFound = false;
-
+ // subtracting/adding two integer/float elements.
+ unsigned Opc[2] {0, 0};
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Op = BV->getOperand(i);
// Skip 'undef' values.
unsigned Opcode = Op.getOpcode();
- if (Opcode == ISD::UNDEF) {
- std::swap(ExpectedOpcode, NextExpectedOpcode);
+ if (Opcode == ISD::UNDEF)
continue;
- }
// Early exit if we found an unexpected opcode.
- if (Opcode != ExpectedOpcode)
+ if (Opcode != ISD::FADD && Opcode != ISD::FSUB)
return false;
SDValue Op0 = Op.getOperand(0);
@@ -7564,11 +7558,11 @@ static bool isAddSubOrSubAdd(const Build
if (I0 != i)
return false;
- // We found a valid add/sub node. Update the information accordingly.
- if (i & 1)
- AddFound = true;
- else
- SubFound = true;
+ // We found a valid add/sub node, make sure its the same opcode as previous
+ // elements for this parity.
+ if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode)
+ return false;
+ Opc[i % 2] = Opcode;
// Update InVec0 and InVec1.
if (InVec0.isUndef()) {
@@ -7585,7 +7579,7 @@ static bool isAddSubOrSubAdd(const Build
// Make sure that operands in input to each add/sub node always
// come from a same pair of vectors.
if (InVec0 != Op0.getOperand(0)) {
- if (ExpectedOpcode == ISD::FSUB)
+ if (Opcode == ISD::FSUB)
return false;
// FADD is commutable. Try to commute the operands
@@ -7598,17 +7592,19 @@ static bool isAddSubOrSubAdd(const Build
if (InVec1 != Op1.getOperand(0))
return false;
- // Update the pair of expected opcodes.
- std::swap(ExpectedOpcode, NextExpectedOpcode);
-
// Increment the number of extractions done.
++NumExtracts;
}
- // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
- if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
+ // Ensure we have found an opcode for both parities and that they are
+ // different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
+ // inputs are undef.
+ if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
+ InVec0.isUndef() || InVec1.isUndef())
return false;
+ IsSubAdd = Opc[0] == ISD::FADD;
+
Opnd0 = InVec0;
Opnd1 = InVec1;
return true;
@@ -7665,15 +7661,17 @@ static bool isFMAddSubOrFMSubAdd(const X
return true;
}
-/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
-/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
+/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or
+/// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or
+/// X86ISD::FMSUBADD node.
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
unsigned NumExtracts;
+ bool IsSubAdd;
if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
- /*matchSubAdd*/false))
+ IsSubAdd))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
@@ -7681,8 +7679,14 @@ static SDValue lowerToAddSubOrFMAddSub(c
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
- return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
+ unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
+ return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
+ }
+
+ // We only support ADDSUB.
+ if (IsSubAdd)
+ return SDValue();
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
@@ -7695,28 +7699,6 @@ static SDValue lowerToAddSubOrFMAddSub(c
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
-/// Try to fold a build_vector that performs an 'fmsubadd' operation
-/// accordingly to X86ISD::FMSUBADD node.
-static SDValue lowerToFMSubAdd(const BuildVectorSDNode *BV,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue Opnd0, Opnd1;
- unsigned NumExtracts;
- if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
- /*matchSubAdd*/true))
- return SDValue();
-
- MVT VT = BV->getSimpleValueType(0);
- SDLoc DL(BV);
-
- // Try to generate X86ISD::FMSUBADD node here.
- SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
- return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2);
-
- return SDValue();
-}
-
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
@@ -8253,8 +8235,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
return AddSub;
- if (SDValue SubAdd = lowerToFMSubAdd(BV, Subtarget, DAG))
- return SubAdd;
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
@@ -30437,7 +30417,7 @@ static SDValue combineTargetShuffle(SDVa
/// the fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
- bool matchSubAdd) {
+ bool &IsSubAdd) {
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -30451,23 +30431,13 @@ static bool isAddSubOrSubAdd(SDNode *N,
if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
- ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
- SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());
-
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
- unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB;
- unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD;
-
- // We require the first shuffle operand to be the ExpectedOpcode node,
- // and the second to be the NextExpectedOpcode node.
- if (V1.getOpcode() == NextExpectedOpcode &&
- V2.getOpcode() == ExpectedOpcode) {
- ShuffleVectorSDNode::commuteMask(Mask);
- std::swap(V1, V2);
- } else if (V1.getOpcode() != ExpectedOpcode ||
- V2.getOpcode() != NextExpectedOpcode)
+ // Make sure we have an FADD and an FSUB.
+ if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) ||
+ (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) ||
+ V1.getOpcode() == V2.getOpcode())
return false;
// If there are other uses of these operations we can't fold them.
@@ -30477,27 +30447,47 @@ static bool isAddSubOrSubAdd(SDNode *N,
// Ensure that both operations have the same operands. Note that we can
// commute the FADD operands.
SDValue LHS, RHS;
- if (ExpectedOpcode == ISD::FSUB) {
+ if (V1.getOpcode() == ISD::FSUB) {
LHS = V1->getOperand(0); RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return false;
} else {
+ assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
LHS = V2->getOperand(0); RHS = V2->getOperand(1);
if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&
(V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))
return false;
}
- // We're looking for blends between FADD and FSUB nodes. We insist on these
- // nodes being lined up in a specific expected pattern.
- if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
- isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
- isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
- isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
- 8, 25, 10, 27, 12, 29, 14, 31})))
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
+
+ int ParitySrc[2] = {-1, -1};
+ unsigned Size = Mask.size();
+ for (unsigned i = 0; i != Size; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Make sure we are using the matching element from the input.
+ if ((M % Size) != i)
+ return false;
+
+ // Make sure we use the same input for all elements of the same parity.
+ int Src = M / Size;
+ if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src)
+ return false;
+ ParitySrc[i % 2] = Src;
+ }
+
+ // Make sure each input is used.
+ if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1])
return false;
+ // It's a subadd if the vector in the even parity is an FADD.
+ IsSubAdd = ParitySrc[0] == 0 ? V1->getOpcode() == ISD::FADD
+ : V2->getOpcode() == ISD::FADD;
+
Opnd0 = LHS;
Opnd1 = RHS;
return true;
@@ -30509,7 +30499,8 @@ static SDValue combineShuffleToAddSubOrF
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
- if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, /*matchSubAdd*/false))
+ bool IsSubAdd;
+ if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
return SDValue();
MVT VT = N->getSimpleValueType(0);
@@ -30517,8 +30508,13 @@ static SDValue combineShuffleToAddSubOrF
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
- return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
+ unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
+ return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
+ }
+
+ if (IsSubAdd)
+ return SDValue();
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
@@ -30529,26 +30525,6 @@ static SDValue combineShuffleToAddSubOrF
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
-/// \brief Try to combine a shuffle into a target-specific
-/// mul-sub-add node.
-static SDValue combineShuffleToFMSubAdd(SDNode *N,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue Opnd0, Opnd1;
- if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, /*matchSubAdd*/true))
- return SDValue();
-
- MVT VT = N->getSimpleValueType(0);
- SDLoc DL(N);
-
- // Try to generate X86ISD::FMSUBADD node here.
- SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
- return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2);
-
- return SDValue();
-}
-
// We are looking for a shuffle where both sources are concatenated with undef
// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
// if we can express this as a single-source shuffle, that's preferable.
@@ -30640,9 +30616,6 @@ static SDValue combineShuffle(SDNode *N,
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
- if (SDValue FMSubAdd = combineShuffleToFMSubAdd(N, Subtarget, DAG))
- return FMSubAdd;
-
if (SDValue HAddSub = foldShuffleOfHorizOp(N))
return HAddSub;
}
More information about the llvm-commits
mailing list