[llvm] r366835 - [X86] In lowerVectorShuffle, instead of creating a new node to canonicalize the shuffle mask by commuting, just commute the mask and swap V1/V2.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 11:46:16 PDT 2019
Author: ctopper
Date: Tue Jul 23 11:46:15 2019
New Revision: 366835
URL: http://llvm.org/viewvc/llvm-project?rev=366835&view=rev
Log:
[X86] In lowerVectorShuffle, instead of creating a new node to canonicalize the shuffle mask by commuting, just commute the mask and swap V1/V2.
LegalizeDAG tries to legal the DAG by legalizing nodes before
their operands.
If we create a new node, we end up legalizing it after its operands.
This prevents some of the optimizations that can be done when the
operand is a build_vector since the build_vector will have been
legalized to something else.
Differential Revision: https://reviews.llvm.org/D65132
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/oddshuffles.ll
llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 23 11:46:15 2019
@@ -16650,7 +16650,7 @@ static bool canonicalizeShuffleMaskWithC
static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
+ ArrayRef<int> OrigMask = SVOp->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
@@ -16676,8 +16676,8 @@ static SDValue lowerVectorShuffle(SDValu
// undef as well. This makes it easier to match the shuffle based solely on
// the mask.
if (V2IsUndef &&
- any_of(Mask, [NumElements](int M) { return M >= NumElements; })) {
- SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+ any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
+ SmallVector<int, 8> NewMask(OrigMask.begin(), OrigMask.end());
for (int &M : NewMask)
if (M >= NumElements)
M = -1;
@@ -16685,15 +16685,16 @@ static SDValue lowerVectorShuffle(SDValu
}
// Check for illegal shuffle mask element index values.
- int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
- assert(llvm::all_of(Mask,
+ int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
+ (void)MaskUpperLimit;
+ assert(llvm::all_of(OrigMask,
[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
"Out of bounds shuffle index");
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
@@ -16701,11 +16702,11 @@ static SDValue lowerVectorShuffle(SDValu
// Create an alternative mask with info about zeroable elements.
// Here we do not set undef elements as zeroable.
- SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
+ SmallVector<int, 64> ZeroableMask(OrigMask.begin(), OrigMask.end());
if (V2IsZero) {
assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
for (int i = 0; i != NumElements; ++i)
- if (Mask[i] != SM_SentinelUndef && Zeroable[i])
+ if (OrigMask[i] != SM_SentinelUndef && Zeroable[i])
ZeroableMask[i] = SM_SentinelZero;
}
@@ -16720,7 +16721,7 @@ static SDValue lowerVectorShuffle(SDValu
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
- if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,
Subtarget, DAG))
return Broadcast;
@@ -16756,8 +16757,11 @@ static SDValue lowerVectorShuffle(SDValu
}
// Commute the shuffle if it will improve canonicalization.
- if (canonicalizeShuffleMaskWithCommute(Mask))
- return DAG.getCommutedVectorShuffle(*SVOp);
+ SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end());
+ if (canonicalizeShuffleMaskWithCommute(Mask)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ }
if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
return V;
Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Tue Jul 23 11:46:15 2019
@@ -1056,7 +1056,7 @@ define void @interleave_24i16_out(<24 x
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3,4],xmm4[5,6,7]
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,7,8,9,2,3,8,9,14,15]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13,14,15,0,1,2,3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13,u,u,u,u,u,u]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5,6,7]
; AVX1-NEXT: vmovdqu %xmm3, (%rsi)
; AVX1-NEXT: vmovdqu %xmm4, (%rdx)
@@ -1094,8 +1094,8 @@ define void @interleave_24i16_out(<24 x
; XOP-NEXT: vpperm {{.*#+}} xmm3 = xmm3[0,1,6,7,12,13,2,3,8,9,14,15],xmm2[4,5,10,11]
; XOP-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
; XOP-NEXT: vpperm {{.*#+}} xmm4 = xmm4[2,3,8,9,14,15,4,5,10,11],xmm2[0,1,6,7,12,13]
-; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,10,11],xmm1[0,1,6,7,12,13,14,15,0,1,2,3]
-; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9],xmm2[2,3,8,9,14,15]
+; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4],xmm0[5],xmm1[6,7]
+; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13],xmm2[2,3,8,9,14,15]
; XOP-NEXT: vmovdqu %xmm3, (%rsi)
; XOP-NEXT: vmovdqu %xmm4, (%rdx)
; XOP-NEXT: vmovdqu %xmm0, (%rcx)
@@ -1187,7 +1187,7 @@ define void @interleave_24i16_in(<24 x i
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,2,2]
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm2[1,2],xmm4[3],xmm2[4,5],xmm4[6],xmm2[7]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[4,5,10,11,10,11,8,9,8,9,14,15,12,13,14,15]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u]
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[2,2,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1],xmm4[2,3],xmm5[4],xmm4[5,6],xmm5[7]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
@@ -1233,9 +1233,8 @@ define void @interleave_24i16_in(<24 x i
; XOP-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; XOP-NEXT: vpperm {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm2[0,1],xmm4[4,5,6,7],xmm2[2,3],xmm4[8,9,10,11]
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
-; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm1[10,11],xmm0[12,13,12,13],xmm1[12,13,12,13],xmm0[14,15],xmm1[14,15],xmm0[14,15]
-; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
-; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
+; XOP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[4,5],xmm2[10,11],xmm0[10,11,8,9],xmm2[12,13],xmm0[14,15,12,13],xmm2[14,15]
; XOP-NEXT: vmovdqu %xmm0, 32(%rdi)
; XOP-NEXT: vmovups %ymm3, (%rdi)
; XOP-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-sub128.ll Tue Jul 23 11:46:15 2019
@@ -1323,8 +1323,8 @@ define <2 x i16> @splatvar_shift_v2i16(<
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm0
-; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
@@ -1334,8 +1334,8 @@ define <2 x i16> @splatvar_shift_v2i16(<
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpsllq $48, %xmm0, %xmm0
; XOPAVX2-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm0
-; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Tue Jul 23 11:46:15 2019
@@ -718,20 +718,23 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x
define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; SSE2-LABEL: shuffle_v2i64_z1:
; SSE2: # %bb.0:
-; SSE2-NEXT: xorpd %xmm1, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_z1:
; SSE3: # %bb.0:
-; SSE3-NEXT: xorpd %xmm1, %xmm1
-; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_z1:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: xorpd %xmm1, %xmm1
-; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_z1:
Modified: llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext-widen.ll Tue Jul 23 11:46:15 2019
@@ -1999,19 +1999,16 @@ define <8 x i32> @shuf_zext_16i16_to_8i3
;
; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=366835&r1=366834&r2=366835&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Tue Jul 23 11:46:15 2019
@@ -2057,19 +2057,16 @@ define <8 x i32> @shuf_zext_16i16_to_8i3
;
; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list