[llvm] r219081 - [x86] Fix PR21139, one of the last remaining regressions found in the
Chandler Carruth
chandlerc at gmail.com
Sun Oct 5 05:07:35 PDT 2014
Author: chandlerc
Date: Sun Oct 5 07:07:34 2014
New Revision: 219081
URL: http://llvm.org/viewvc/llvm-project?rev=219081&view=rev
Log:
[x86] Fix PR21139, one of the last remaining regressions found in the
new vector shuffle lowering.
This is loosely based on a patch by Marius Wachtler to the PR (thanks!).
I refactored it a bi to use std::count_if and a mutable array ref but
the core idea was exactly right. I also added some direct testing of
this case.
I believe PR21137 is now the only remaining regression.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=219081&r1=219080&r2=219081&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct 5 07:07:34 2014
@@ -9278,21 +9278,29 @@ static SDValue lowerV16I8VectorShuffle(S
//
// FIXME: We need to handle other interleaving widths (i16, i32, ...).
if (shouldLowerAsInterleaving(Mask)) {
- // FIXME: Figure out whether we should pack these into the low or high
- // halves.
-
- int EMask[16], OMask[16];
+ int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 0 && M < 8) || (M >= 16 && M < 24);
+ });
+ int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 8 && M < 16) || M >= 24;
+ });
+ int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ bool UnpackLo = NumLoHalf >= NumHiHalf;
+ MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
+ MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
for (int i = 0; i < 8; ++i) {
- EMask[i] = Mask[2*i];
- OMask[i] = Mask[2*i + 1];
- EMask[i + 8] = -1;
- OMask[i + 8] = -1;
+ TargetEMask[i] = Mask[2 * i];
+ TargetOMask[i] = Mask[2 * i + 1];
}
SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, Evens, Odds);
}
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=219081&r1=219080&r2=219081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Sun Oct 5 07:07:34 2014
@@ -232,6 +232,20 @@ define <16 x i8> @shuffle_v16i8_00_16_01
ret <16 x i8> %shuffle
}
+define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
; SSE: # BB#0:
@@ -373,7 +387,7 @@ define <16 x i8> @shuffle_v16i8_03_02_01
define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
; SSE2-LABEL: trunc_v4i32_shuffle:
; SSE2: # BB#0:
-; SSE2-NEXT: pand .LCPI13_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
@@ -444,7 +458,7 @@ entry:
define <16 x i8> @PR20540(<8 x i8> %a) {
; SSE2-LABEL: PR20540:
; SSE2: # BB#0:
-; SSE2-NEXT: pand .LCPI16_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -767,7 +781,7 @@ define <16 x i8> @shuffle_v16i8_17_18_19
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7]
-; SSE2-NEXT: pand .LCPI23_0(%rip), %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm1, %xmm3
@@ -821,7 +835,7 @@ define <16 x i8> @shuffle_v16i8_01_02_03
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
-; SSE2-NEXT: pand .LCPI24_0(%rip), %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm0, %xmm3
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=219081&r1=219080&r2=219081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Sun Oct 5 07:07:34 2014
@@ -191,9 +191,8 @@ define <16 x i16> @zext_16i8_to_16i16(<1
;
; AVX1-LABEL: zext_16i8_to_16i16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list