[llvm] r234004 - [DAGCombiner] Combine shuffles of BUILD_VECTOR and SCALAR_TO_VECTOR
Simon Pilgrim
llvm-dev at redking.me.uk
Fri Apr 3 03:02:22 PDT 2015
Author: rksimon
Date: Fri Apr 3 05:02:21 2015
New Revision: 234004
URL: http://llvm.org/viewvc/llvm-project?rev=234004&view=rev
Log:
[DAGCombiner] Combine shuffles of BUILD_VECTOR and SCALAR_TO_VECTOR
This patch attempts to fold the shuffling of 'scalar source' inputs - BUILD_VECTOR and SCALAR_TO_VECTOR nodes - if the shuffle node is the only user. This folds away a lot of unnecessary shuffle nodes, and allows quite a bit of constant folding that was being missed.
Differential Revision: http://reviews.llvm.org/D8516
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll
llvm/trunk/test/CodeGen/PowerPC/vperm-lowering.ll
llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll
llvm/trunk/test/CodeGen/X86/sse41.ll
llvm/trunk/test/CodeGen/X86/vec_insert-5.ll
llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll
llvm/trunk/test/CodeGen/X86/vec_zero_cse.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Apr 3 05:02:21 2015
@@ -11980,6 +11980,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
return V;
}
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ SmallVector<SDValue, 8> Ops;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M % NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ break;
+ }
+ }
+ Ops.push_back(Op);
+ }
+ if (Ops.size() == VT.getVectorNumElements()) {
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+ }
+ }
+
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
// back to their original types.
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll Fri Apr 3 05:02:21 2015
@@ -1086,7 +1086,7 @@ define <2 x i32> @test_concat_diff_v1i32
; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
entry:
%c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
%d = insertelement <2 x i32> undef, i32 %c, i32 0
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll Fri Apr 3 05:02:21 2015
@@ -1,22 +1,8 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
-; The mask:
-; CHECK: lCPI0_0:
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; The second vector is legalized to undef and the elements of the first vector
-; are used instead.
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 4 ; 0x4
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 0 ; 0x0
; CHECK: test1
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
-; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
-; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000
define <8 x i1> @test1() {
entry:
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -30,18 +16,16 @@ entry:
; CHECK: lCPI1_0:
; CHECK: .byte 0 ; 0x0
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 7 ; 0x7
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 1 ; 0x1
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: test2
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0 at PAGEOFF]
-; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_1 at PAGE
-; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1 at PAGEOFF]
-; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0 at PAGE
+; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0 at PAGEOFF]
define <8 x i1>@test2() {
bb:
%Shuff = shufflevector <8 x i1> zeroinitializer,
@@ -51,28 +35,8 @@ bb:
ret <8 x i1> %Shuff
}
-; CHECK: lCPI2_0:
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 0 ; 0x0
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 0 ; 0x0
; CHECK: test3
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI2_0 at PAGE
-; CHECK: ldr q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0 at PAGEOFF]
-; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1 at PAGEOFF]
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi.4s v{{[0-9]+}}, #0x1
define <16 x i1> @test3(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
@@ -81,29 +45,26 @@ bb:
i32 14, i32 0>
ret <16 x i1> %Shuff
}
-; CHECK: lCPI3_1:
+; CHECK: lCPI3_0:
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: .byte 1 ; 0x1
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 18 ; 0x12
-; CHECK: .byte 4 ; 0x4
-; CHECK: .byte 5 ; 0x5
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 7 ; 0x7
-; CHECK: .byte 8 ; 0x8
-; CHECK: .byte 31 ; 0x1f
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 30 ; 0x1e
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 13 ; 0xd
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 15 ; 0xf
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: _test4:
-; CHECK: ldr q[[REG1:[0-9]+]]
-; CHECK: movi.2d v[[REG0:[0-9]+]], #0000000000000000
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_1 at PAGE
-; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1 at PAGEOFF]
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
+; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0 at PAGE
+; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0 at PAGEOFF]
define <16 x i1> @test4(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> zeroinitializer,
Modified: llvm/trunk/test/CodeGen/PowerPC/vperm-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vperm-lowering.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vperm-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vperm-lowering.ll Fri Apr 3 05:02:21 2015
@@ -9,58 +9,23 @@ define <16 x i8> @foo() nounwind ssp {
}
; CHECK: .LCPI0_0:
-; CHECK: .byte 31
-; CHECK: .byte 26
-; CHECK: .byte 21
-; CHECK: .byte 16
-; CHECK: .byte 11
-; CHECK: .byte 6
-; CHECK: .byte 1
-; CHECK: .byte 28
-; CHECK: .byte 23
-; CHECK: .byte 18
-; CHECK: .byte 13
-; CHECK: .byte 8
-; CHECK: .byte 3
-; CHECK: .byte 30
-; CHECK: .byte 25
-; CHECK: .byte 20
-; CHECK: .LCPI0_1:
; CHECK: .byte 0
-; CHECK: .byte 1
-; CHECK: .byte 2
-; CHECK: .byte 3
-; CHECK: .byte 4
; CHECK: .byte 5
-; CHECK: .byte 6
-; CHECK: .byte 7
-; CHECK: .byte 8
-; CHECK: .byte 9
; CHECK: .byte 10
-; CHECK: .byte 11
-; CHECK: .byte 12
-; CHECK: .byte 13
-; CHECK: .byte 14
; CHECK: .byte 15
-; CHECK: .LCPI0_2:
-; CHECK: .byte 16
-; CHECK: .byte 17
-; CHECK: .byte 18
-; CHECK: .byte 19
; CHECK: .byte 20
-; CHECK: .byte 21
-; CHECK: .byte 22
-; CHECK: .byte 23
-; CHECK: .byte 24
; CHECK: .byte 25
-; CHECK: .byte 26
-; CHECK: .byte 27
-; CHECK: .byte 28
-; CHECK: .byte 29
; CHECK: .byte 30
-; CHECK: .byte 31
+; CHECK: .byte 3
+; CHECK: .byte 8
+; CHECK: .byte 13
+; CHECK: .byte 18
+; CHECK: .byte 23
+; CHECK: .byte 28
+; CHECK: .byte 1
+; CHECK: .byte 6
+; CHECK: .byte 11
; CHECK: foo:
-; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2 at toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2 at toc@l
+; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_0 at toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_0 at toc@l
; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
-; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
Modified: llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll Fri Apr 3 05:02:21 2015
@@ -75,8 +75,7 @@ define i64 @t5(i32 %a, i32 %b) nounwind
; CHECK-NEXT: movd
; CHECK-NEXT: movd
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: movd %xmm1, %rax
; CHECK-NEXT: retq
%v0 = insertelement <2 x i32> undef, i32 %a, i32 0
%v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Fri Apr 3 05:02:21 2015
@@ -1026,29 +1026,24 @@ define <4 x float> @pr20087(<4 x float>
}
; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
-define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
+define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
; X32-LABEL: insertps_pr20411:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
-; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
-; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X32-NEXT: movdqu %xmm1, (%eax)
; X32-NEXT: retl
;
-; X64-LABEL: insertps_pr20411:
-; X64: ## BB#0:
-; X64-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
-; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[3,1,2,3]
-; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; X64-NEXT: movdqu %xmm1, (%rdi)
+; X64-LABEL: insertps_pr20411:
+; X64: ## BB#0:
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; X64-NEXT: movdqu %xmm1, (%rdi)
; X64-NEXT: retq
- %gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; 4 5 6 7
- %shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
- %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
- %ptrcast = bitcast i32* %RET to <4 x i32>*
- store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
+ %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
+ %ptrcast = bitcast i32* %RET to <4 x i32>*
+ store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/vec_insert-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-5.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-5.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-5.ll Fri Apr 3 05:02:21 2015
@@ -8,7 +8,7 @@ define void @t1(i32 %a, x86_mmx* %P) no
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: shll $12, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; CHECK-NEXT: movlpd %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp12 = shl i32 %a, 12
Modified: llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-mmx.ll Fri Apr 3 05:02:21 2015
@@ -6,7 +6,7 @@ define x86_mmx @t0(i32 %A) nounwind {
; X86-32-LABEL: t0:
; X86-32: ## BB#0:
; X86-32: movd {{[0-9]+}}(%esp), %xmm0
-; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
+; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
; X86-32-NEXT: movlpd %xmm0, (%esp)
; X86-32-NEXT: movq (%esp), %mm0
; X86-32-NEXT: addl $12, %esp
Modified: llvm/trunk/test/CodeGen/X86/vec_zero_cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_zero_cse.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_zero_cse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_zero_cse.ll Fri Apr 3 05:02:21 2015
@@ -17,7 +17,7 @@ define void @test1() {
define void @test2() {
;CHECK-LABEL: @test2
-;CHECK: pshufd
+;CHECK: pcmpeqd
store <1 x i64> < i64 -1 >, <1 x i64>* @M1
store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
ret void
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Fri Apr 3 05:02:21 2015
@@ -634,28 +634,16 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: retq
+; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: movzbl %dil, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: movzbl %dil, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -663,29 +651,18 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz
}
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: retq
-;
+; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: shll $8, %edi
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $2, %edi, %xmm0
+; SSE-NEXT: retq
+
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: shll $8, %edi
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -695,14 +672,16 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz
define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSE: # BB#0:
-; SSE-NEXT: movd %edi, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSE-NEXT: shll $8, %edi
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $7, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX-NEXT: shll $8, %edi
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $7, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
@@ -710,32 +689,18 @@ define <16 x i8> @shuffle_v16i8_zz_uu_uu
}
define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pslld $24, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pslld $24, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: retq
+; SSE-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: movzbl %dil, %eax
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $1, %eax, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslld $24, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: movzbl %dil, %eax
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $1, %eax, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=234004&r1=234003&r2=234004&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Fri Apr 3 05:02:21 2015
@@ -1384,16 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $1, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $1, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
@@ -1403,16 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $5, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $5, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
@@ -1422,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
; SSE: # BB#0:
-; SSE-NEXT: movd %edi, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $7, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $7, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
@@ -1439,16 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $2, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
More information about the llvm-commits
mailing list