[llvm] r343989 - [x86] make horizontal binop matching clearer; NFCI
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 8 11:08:02 PDT 2018
Author: spatel
Date: Mon Oct 8 11:08:02 2018
New Revision: 343989
URL: http://llvm.org/viewvc/llvm-project?rev=343989&view=rev
Log:
[x86] make horizontal binop matching clearer; NFCI
The instructions are complicated, so this code will
probably never be very obvious, but hopefully this
makes it better.
As shown in PR39195:
https://bugs.llvm.org/show_bug.cgi?id=39195
...we need to improve the matching to not miss cases
where we're h-opping on 1 source vector, and that
should be a small patch after this rearranging.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=343989&r1=343988&r2=343989&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Oct 8 11:08:02 2018
@@ -36927,10 +36927,12 @@ static SDValue combineStore(SDNode *N, S
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
-/// Note that the binary operation should have the property that if one of the
-/// operands is UNDEF then the result is UNDEF.
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
- // Look for the following pattern: if
+ // If either operand is undef, bail out. The binop should be simplified.
+ if (LHS.isUndef() || RHS.isUndef())
+ return false;
+
+ // Look for the following pattern:
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
// and
@@ -36945,25 +36947,15 @@ static bool isHorizontalBinOp(SDValue &L
return false;
MVT VT = LHS.getSimpleValueType();
-
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
- // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
- // operate independently on 128-bit lanes.
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts / NumLanes;
- assert((NumLaneElts % 2 == 0) &&
- "Vector type should have an even number of elements in each lane");
- unsigned HalfLaneElts = NumLaneElts/2;
-
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
- // If LHS is not a shuffle then pretend it is the shuffle
+ // If LHS is not a shuffle, then pretend it is the identity shuffle:
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
- // NOTE: in what follows a default initialized SDValue represents an UNDEF of
- // type VT.
+ // NOTE: A default initialized SDValue represents an UNDEF of type VT.
+ unsigned NumElts = VT.getVectorNumElements();
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
@@ -36974,8 +36966,7 @@ static bool isHorizontalBinOp(SDValue &L
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), LMask.begin());
} else {
- if (!LHS.isUndef())
- A = LHS;
+ A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
@@ -36992,43 +36983,48 @@ static bool isHorizontalBinOp(SDValue &L
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), RMask.begin());
} else {
- if (!RHS.isUndef())
- C = RHS;
+ C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
+ // If A and B occur in reverse order in RHS, then canonicalize by commuting
+ // RHS operands and shuffle mask.
+ if (A != C) {
+ std::swap(C, D);
+ ShuffleVectorSDNode::commuteMask(RMask);
+ }
// Check that the shuffles are both shuffling the same vectors.
- if (!(A == C && B == D) && !(A == D && B == C))
+ if (!(A == C && B == D))
return false;
- // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
- if (!A.getNode() && !B.getNode())
- return false;
-
- // If A and B occur in reverse order in RHS, then "swap" them (which means
- // rewriting the mask).
- if (A != C)
- ShuffleVectorSDNode::commuteMask(RMask);
-
- // At this point LHS and RHS are equivalent to
- // LHS = VECTOR_SHUFFLE A, B, LMask
- // RHS = VECTOR_SHUFFLE A, B, RMask
+ // LHS and RHS are now:
+ // LHS = shuffle A, B, LMask
+ // RHS = shuffle A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0; i != NumLaneElts; ++i) {
- int LIdx = LMask[i+l], RIdx = RMask[i+l];
-
- // Ignore any UNDEF components.
+ // AVX defines horizontal add/sub to operate independently on 128-bit lanes,
+ // so we just repeat the inner loop if this is a 256-bit op.
+ unsigned Num128BitChunks = VT.getSizeInBits() / 128;
+ unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
+ assert((NumEltsPer128BitChunk % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
+ for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
+ // Ignore undefined components.
+ int LIdx = LMask[i + j], RIdx = RMask[i + j];
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
- // Check that successive elements are being operated on. If not, this is
+ // The low half of the 128-bit result must choose from A.
+ // The high half of the 128-bit result must choose from B.
+ unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
+ unsigned Src = i >= NumEltsPer64BitChunk;
+
+ // Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
- int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
+ int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
More information about the llvm-commits
mailing list