[llvm] r273803 - [X86] Replace calls to isSingleInputShuffleMask with just checking if V2 is UNDEF. Canonicalization and creation of shuffle vector ensures this is equivalent.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 25 22:10:53 PDT 2016
Author: ctopper
Date: Sun Jun 26 00:10:53 2016
New Revision: 273803
URL: http://llvm.org/viewvc/llvm-project?rev=273803&view=rev
Log:
[X86] Replace calls to isSingleInputShuffleMask with just checking if V2 is UNDEF. Canonicalization and creation of shuffle vector ensures this is equivalent.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=273803&r1=273802&r2=273803&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jun 26 00:10:53 2016
@@ -8718,7 +8718,7 @@ static SDValue lowerVectorShuffleAsPermu
"This routine only supports integer vectors.");
assert(VT.is128BitVector() &&
"This routine only works on 128-bit vectors.");
- assert(!isSingleInputShuffleMask(Mask) &&
+ assert(!V2.isUndef() &&
"This routine should only be used when blending two inputs.");
assert(Mask.size() >= 2 && "Single element masks are invalid.");
@@ -8832,7 +8832,7 @@ static SDValue lowerV2F64VectorShuffle(c
assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
@@ -8910,7 +8910,7 @@ static SDValue lowerV2I64VectorShuffle(c
assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
@@ -10208,7 +10208,7 @@ static SDValue lowerV16I8VectorShuffle(c
// We special case these as they can be particularly efficiently handled with
// the PACKUSB instruction on x86 and they show up in common patterns of
// rearranging bytes to truncate wide elements.
- bool IsSingleInput = isSingleInputShuffleMask(Mask);
+ bool IsSingleInput = V2.isUndef();
if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) {
// NumEvenDrops is the power of two stride of the elements. Another way of
// thinking about it is that we need to drop the even elements this many
@@ -10512,9 +10512,8 @@ static SDValue lowerVectorShuffleAsSplit
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
- assert(!isSingleInputShuffleMask(Mask) && "This routine must not be used to "
- "lower single-input shuffles as it "
- "could then recurse on itself.");
+ assert(!V2.isUndef() && "This routine must not be used to lower single-input "
+ "shuffles as it could then recurse on itself.");
int Size = Mask.size();
// If this can be modeled as a broadcast of two elements followed by a blend,
@@ -10587,7 +10586,7 @@ static SDValue lowerVectorShuffleAsLaneP
if (!LaneCrossing[0] || !LaneCrossing[1])
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
- assert(isSingleInputShuffleMask(Mask) &&
+ assert(V2.isUndef() &&
"This last part of this routine only works on single input shuffles");
SmallVector<int, 32> FlippedBlendMask(Size);
@@ -10634,7 +10633,7 @@ static SDValue lowerV2X128VectorShuffle(
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
// With AVX2 we should use VPERMQ/VPERMPD to allow memory folding.
- if (Subtarget.hasAVX2() && isSingleInputShuffleMask(Mask))
+ if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
@@ -10710,8 +10709,7 @@ static SDValue lowerV2X128VectorShuffle(
static SDValue lowerVectorShuffleByMerging128BitLanes(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
- assert(!isSingleInputShuffleMask(Mask) &&
- "This is only useful with multiple inputs.");
+ assert(!V2.isUndef() && "This is only useful with multiple inputs.");
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
@@ -11087,7 +11085,7 @@ static SDValue lowerV4F64VectorShuffle(c
Subtarget, DAG))
return V;
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
// Check for being able to broadcast a single element.
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
@@ -11194,7 +11192,7 @@ static SDValue lowerV4I64VectorShuffle(c
// use lower latency instructions that will operate on both 128-bit lanes.
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
int PSHUFDMask[] = {-1, -1, -1, -1};
for (int i = 0; i < 2; ++i)
if (RepeatedMask[i] >= 0) {
@@ -11211,7 +11209,7 @@ static SDValue lowerV4I64VectorShuffle(c
// AVX2 provides a direct instruction for permuting a single input across
// lanes.
- if (isSingleInputShuffleMask(Mask))
+ if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
@@ -11274,7 +11272,7 @@ static SDValue lowerV8F32VectorShuffle(c
if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
- if (isSingleInputShuffleMask(Mask))
+ if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
@@ -11296,7 +11294,7 @@ static SDValue lowerV8F32VectorShuffle(c
// If we have a single input shuffle with different shuffle patterns in the
// two 128-bit lanes use the variable mask to VPERMILPS.
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
SDValue VPermMask[8];
for (int i = 0; i < 8; ++i)
VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
@@ -11365,7 +11363,7 @@ static SDValue lowerV8I32VectorShuffle(c
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
- if (isSingleInputShuffleMask(Mask))
+ if (V2.isUndef())
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
@@ -11393,7 +11391,7 @@ static SDValue lowerV8I32VectorShuffle(c
// If the shuffle patterns aren't repeated but it is a single input, directly
// generate a cross-lane VPERMD instruction.
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
SDValue VPermMask[8];
for (int i = 0; i < 8; ++i)
VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
@@ -11463,7 +11461,7 @@ static SDValue lowerV16I16VectorShuffle(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return V;
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
// There are no generalized cross-lane shuffle operations available on i16
// element types.
if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
@@ -11546,8 +11544,7 @@ static SDValue lowerV32I8VectorShuffle(c
// There are no generalized cross-lane shuffle operations available on i8
// element types.
- if (isSingleInputShuffleMask(Mask) &&
- is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
DAG);
@@ -11691,7 +11688,7 @@ static SDValue lowerVectorShuffleWithPER
MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
- if (isSingleInputShuffleMask(Mask))
+ if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
@@ -11750,7 +11747,7 @@ static SDValue lowerV8I64VectorShuffle(c
// use lower latency instructions that will operate on both 128-bit lanes.
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, RepeatedMask)) {
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
int PSHUFDMask[] = {-1, -1, -1, -1};
for (int i = 0; i < 2; ++i)
if (RepeatedMask[i] >= 0) {
@@ -11792,7 +11789,7 @@ static SDValue lowerV16I32VectorShuffle(
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
- if (isSingleInputShuffleMask(Mask))
+ if (V2.isUndef())
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
@@ -11841,7 +11838,7 @@ static SDValue lowerV32I16VectorShuffle(
DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
- if (isSingleInputShuffleMask(Mask)) {
+ if (V2.isUndef()) {
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
// As this is a single-input shuffle, the repeated mask should be
@@ -12064,6 +12061,9 @@ static SDValue lowerVectorShuffle(SDValu
if (NumV2Elements > NumV1Elements)
return DAG.getCommutedVectorShuffle(*SVOp);
+ assert(NumV1Elements > 0 && "No V1 indices");
+ assert((NumV2Elements > 0 || V2IsUndef) && "V2 not undef, but not used");
+
// When the number of V1 and V2 elements are the same, try to minimize the
// number of uses of V2 in the low half of the vector. When that is tied,
// ensure that the sum of indices for V1 is equal to or lower than the sum
More information about the llvm-commits
mailing list