[llvm] r225551 - [X86][SSE] Avoid vector byte shuffles with zero by using pshufb to create zeros
Patrik Hägglund H
patrik.h.hagglund at ericsson.com
Mon Jan 12 05:48:13 PST 2015
Hi Simon,
This commit is causing a regression, found using llvm-stress:
bin/llvm-stress -size 250 -seed 25104 | bin/llc -march=x86-64 -mcpu=corei7 -o /dev/null
llc: ../lib/Target/X86/X86ISelLowering.cpp:9621: llvm::SDValue lowerV16I8VectorShuffle(llvm::SDValue, llvm::SDValue, llvm::SDValue, const llvm::X86Subtarget *, llvm::SelectionDAG &): Assertion `(V1InUse || V2InUse) && "Shuffling to a zeroable vector"' failed.
0 llc 0x0000000001318408 llvm::sys::PrintStackTrace(_IO_FILE*) + 40
1 llc 0x00000000013199eb
2 libpthread.so.0 0x00007fba2db7c810
3 libc.so.6 0x00007fba2d397755 gsignal + 53
4 libc.so.6 0x00007fba2d398d31 abort + 385
5 libc.so.6 0x00007fba2d390610 __assert_fail + 240
6 llc 0x0000000000bfe8e9
7 llc 0x0000000000b90de6 llvm::X86TargetLowering::LowerVECTOR_SHUFFLE(llvm::SDValue, llvm::SelectionDAG&) const + 5270
8 llc 0x0000000000bb71aa llvm::X86TargetLowering::LowerOperation(llvm::SDValue, llvm::SelectionDAG&) const + 6042
9 llc 0x00000000011681ef
10 llc 0x000000000116740f llvm::SelectionDAG::Legalize() + 479
11 llc 0x00000000012222fb llvm::SelectionDAGISel::CodeGenAndEmitDAG() + 2827
12 llc 0x0000000001221008 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) + 8648
13 llc 0x000000000121dda4 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) + 1316
14 llc 0x0000000000b67ec6
15 llc 0x0000000000daf49c llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 124
16 llc 0x0000000000fd356c llvm::FPPassManager::runOnFunction(llvm::Function&) + 524
17 llc 0x0000000000fd37ab llvm::FPPassManager::runOnModule(llvm::Module&) + 43
18 llc 0x0000000000fd3ca7 llvm::legacy::PassManagerImpl::run(llvm::Module&) + 935
19 llc 0x000000000058c595 main + 6645
20 libc.so.6 0x00007fba2d383c16 __libc_start_main + 230
21 llc 0x0000000000588ef1
Stack dump:
0. Program arguments: bin/llc -march=x86-64 -mcpu=corei7 -o /dev/null
1. Running pass 'Function Pass Manager' on module '<stdin>'.
2. Running pass 'X86 DAG->DAG Instruction Selection' on function '@autogen_SD25104'
Abort (core dumped)
Here is a bugpoint-simplified input file:
; ModuleID = 'bugpoint-reduced-simplified.bc'
target triple = "x86_64-unknown-linux-gnu"
define void @autogen_SD25104() {
BB:
%Tr = trunc <16 x i64> zeroinitializer to <16 x i8>
%Shuff30 = shufflevector <16 x i8> %Tr, <16 x i8> %Tr, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20
, i32 22, i32 24, i32 26>
%Cmp34 = icmp eq <16 x i8> %Shuff30, %Shuff30
br label %CF198
CF198: ; preds = %CF198, %BB
br i1 undef, label %CF198, label %CF203
CF203: ; preds = %CF203, %CF198
%I73 = insertelement <16 x i1> %Cmp34, i1 undef, i32 6
br label %CF203
}
/Patrik Hägglund
-----Original Message-----
From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Simon Pilgrim
Sent: den 9 januari 2015 23:03
To: llvm-commits at cs.uiuc.edu
Subject: [llvm] r225551 - [X86][SSE] Avoid vector byte shuffles with zero by using pshufb to create zeros
Author: rksimon
Date: Fri Jan 9 16:03:19 2015
New Revision: 225551
URL: http://llvm.org/viewvc/llvm-project?rev=225551&view=rev
Log:
[X86][SSE] Avoid vector byte shuffles with zero by using pshufb to create zeros
pshufb can shuffle in zero bytes as well as bytes from a source vector - we can use this to avoid having to shuffle 2 vectors and ORing the result when the used inputs from a vector are all zeroable.
Differential Revision: http://reviews.llvm.org/D6878
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=225551&r1=225550&r2=225551&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jan 9 16:03:19 2015
@@ -9599,23 +9599,41 @@ static SDValue lowerV16I8VectorShuffle(S
if (Subtarget->hasSSSE3()) {
SDValue V1Mask[16];
SDValue V2Mask[16];
- for (int i = 0; i < 16; ++i)
+ bool V1InUse = false;
+ bool V2InUse = false;
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
} else {
- V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
- V2Mask[i] =
- DAG.getConstant(Mask[i] < 16 ? 0x80 : Mask[i] - 16, MVT::i8);
+ const int ZeroMask = 0x80;
+ int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
+ int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
+ if (Zeroable[i])
+ V1Idx = V2Idx = ZeroMask;
+ V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
+ V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
+ V1InUse |= (ZeroMask != V1Idx);
+ V2InUse |= (ZeroMask != V2Idx);
}
- V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
- if (isSingleInputShuffleMask(Mask))
- return V1; // Single inputs are easy.
+ }
+ assert((V1InUse || V2InUse) && "Shuffling to a zeroable vector");
- // Otherwise, blend the two.
- V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
- return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ if (V1InUse)
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+ if (V2InUse)
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+
+ // If we need shuffled inputs from both, blend the two.
+ if (V1InUse && V2InUse)
+ return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ if (V1InUse)
+ return V1; // Single inputs are easy.
+ if (V2InUse)
+ return V2; // Single inputs are easy.
}
// There are special ways we can lower some single-element blends.
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=225551&r1=225550&r2=225551&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Fri Jan 9 16:03:19 2015
@@ -467,32 +467,23 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: PR20540:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: PR20540:
-; SSE41: # BB#0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: PR20540:
-; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
- %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
- ret <16 x i8> %shuffle
+;
+; SSSE3-LABEL: PR20540:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: PR20540:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: PR20540:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
@@ -502,34 +493,25 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <16 x i8> undef, i8 %i, i32 0
- %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+ %a = insertelement <16 x i8> undef, i8 %i, i32 0
+ %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
@@ -541,34 +523,25 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <16 x i8> undef, i8 %i, i32 0
- %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+ %a = insertelement <16 x i8> undef, i8 %i, i32 0
+ %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %shuffle
}
@@ -598,36 +571,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <16 x i8> undef, i8 %i, i32 3
- %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+ %a = insertelement <16 x i8> undef, i8 %i, i32 3
+ %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list