[llvm] 66e7dce - Revert "[X86][SSE] Shuffle combine blends to OR(X, Y) if the relevant elements are known zero."
Mitch Phillips via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 3 13:51:07 PDT 2020
Author: Mitch Phillips
Date: 2020-08-03T13:48:30-07:00
New Revision: 66e7dce714fabd3ddb1aed635e4b826476d4f1a2
URL: https://github.com/llvm/llvm-project/commit/66e7dce714fabd3ddb1aed635e4b826476d4f1a2
DIFF: https://github.com/llvm/llvm-project/commit/66e7dce714fabd3ddb1aed635e4b826476d4f1a2.diff
LOG: Revert "[X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero."
This reverts commit 219f32f4b68679563443cdaae7b8174c9976409a.
Commit contains unsigned compasions that break bots that build with
-Wsign-compare.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/insertelement-ones.ll
llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b2bfcc2698f4..e9bb50aacec0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
// is a valid shuffle index.
- SDValue N0 = peekThroughBitcasts(N.getOperand(0));
- SDValue N1 = peekThroughBitcasts(N.getOperand(1));
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
return false;
SmallVector<int, 64> SrcMask0, SrcMask1;
@@ -7413,24 +7413,34 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
true))
return false;
+ // Shuffle inputs must be the same size as the result.
+ if (llvm::any_of(SrcInputs0, [VT](SDValue Op) {
+ return VT.getSizeInBits() != Op.getValueSizeInBits();
+ }))
+ return false;
+ if (llvm::any_of(SrcInputs1, [VT](SDValue Op) {
+ return VT.getSizeInBits() != Op.getValueSizeInBits();
+ }))
+ return false;
+
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
- for (int i = 0; i != (int)MaskSize; ++i) {
+ for (size_t i = 0; i != MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
Mask.push_back(SM_SentinelZero);
else if (Mask1[i] == SM_SentinelZero)
- Mask.push_back(i);
+ Mask.push_back(Mask0[i]);
else if (Mask0[i] == SM_SentinelZero)
- Mask.push_back(i + MaskSize);
+ Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size()));
else
return false;
}
- Ops.push_back(N0);
- Ops.push_back(N1);
+ Ops.append(SrcInputs0.begin(), SrcInputs0.end());
+ Ops.append(SrcInputs1.begin(), SrcInputs1.end());
return true;
}
case ISD::INSERT_SUBVECTOR: {
@@ -34209,7 +34219,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
SelectionDAG &DAG, const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
bool IsUnary) {
- unsigned NumMaskElts = Mask.size();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
@@ -34267,46 +34276,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Attempt to match against a OR if we're performing a blend shuffle and the
- // non-blended source element is zero in each case.
- if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
- (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
- bool IsBlend = true;
- unsigned NumV1Elts = V1.getValueType().getVectorNumElements();
- unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
- unsigned Scale1 = NumV1Elts / NumMaskElts;
- unsigned Scale2 = NumV2Elts / NumMaskElts;
- APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts);
- APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts);
- for (unsigned i = 0; i != NumMaskElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- if (M == SM_SentinelZero) {
- DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
- DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
- continue;
- }
- if (M == i) {
- DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
- continue;
- }
- if (M == (i + NumMaskElts)) {
- DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
- continue;
- }
- IsBlend = false;
- break;
- }
- if (IsBlend &&
- DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
- DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
- Shuffle = ISD::OR;
- SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT();
- return true;
- }
- }
-
return false;
}
diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll
index 6a9a401264c5..3d8e42b9c07d 100644
--- a/llvm/test/CodeGen/X86/insertelement-ones.ll
+++ b/llvm/test/CodeGen/X86/insertelement-ones.ll
@@ -389,9 +389,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
; SSE2-NEXT: por %xmm4, %xmm0
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
+; SSE2-NEXT: pand %xmm5, %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
-; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: retq
@@ -409,9 +411,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
; SSE3-NEXT: movdqa %xmm3, %xmm4
; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
; SSE3-NEXT: por %xmm4, %xmm0
-; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
+; SSE3-NEXT: pand %xmm5, %xmm1
; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
-; SSE3-NEXT: por %xmm3, %xmm1
+; SSE3-NEXT: pandn %xmm3, %xmm5
+; SSE3-NEXT: por %xmm5, %xmm1
; SSE3-NEXT: pand %xmm2, %xmm1
; SSE3-NEXT: por %xmm4, %xmm1
; SSE3-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
index 9256a43f8e33..6b49f22f21f1 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
@@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind {
define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
; AVX1-LABEL: negative:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14]
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 86423ce76065..f448f41cf522 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1713,8 +1713,9 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
;
; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index e5285aebda69..82df05e5ae06 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3]
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
More information about the llvm-commits
mailing list