[llvm] r371307 - [X86] Fix pshuflw formation from repeated shuffle mask (PR43230)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 7 05:13:44 PDT 2019


Author: nikic
Date: Sat Sep  7 05:13:44 2019
New Revision: 371307

URL: http://llvm.org/viewvc/llvm-project?rev=371307&view=rev
Log:
[X86] Fix pshuflw formation from repeated shuffle mask (PR43230)

Fix for https://bugs.llvm.org/show_bug.cgi?id=43230.

When creating PSHUFLW from a repeated shuffle mask, we have to apply
the checks to the repeated mask, not the original one. For the test
case from PR43230 the inspected part of the original mask is all undef.

Differential Revision: https://reviews.llvm.org/D67314

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=371307&r1=371306&r2=371307&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Sep  7 05:13:44 2019
@@ -32041,8 +32041,8 @@ static bool matchUnaryPermuteShuffle(MVT
   if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
     SmallVector<int, 4> RepeatedMask;
     if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
-      ArrayRef<int> LoMask(Mask.data() + 0, 4);
-      ArrayRef<int> HiMask(Mask.data() + 4, 4);
+      ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
+      ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
 
       // PSHUFLW: permute lower 4 elements only.
       if (isUndefOrInRange(LoMask, 0, 4) &&

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=371307&r1=371306&r2=371307&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Sat Sep  7 05:13:44 2019
@@ -4777,23 +4777,14 @@ define <16 x i16> @pr43230(<16 x i16> %a
 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-SLOW-LABEL: pr43230:
-; AVX2-SLOW:       # %bb.0:
-; AVX2-SLOW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-SLOW-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; AVX2-SLOW-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; AVX2-SLOW-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
-; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
-; AVX2-SLOW-NEXT:    retq
-;
-; AVX2-FAST-LABEL: pr43230:
-; AVX2-FAST:       # %bb.0:
-; AVX2-FAST-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-FAST-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; AVX2-FAST-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; AVX2-FAST-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
-; AVX2-FAST-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-FAST-NEXT:    retq
+; AVX2-LABEL: pr43230:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
+; AVX2-NEXT:    retq
 ;
 ; AVX512VL-LABEL: pr43230:
 ; AVX512VL:       # %bb.0:




More information about the llvm-commits mailing list