[llvm] r313507 - [X86] Teach execution domain fixing to convert between VPERMILPS and VPSHUFD.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 17 20:29:47 PDT 2017
Author: ctopper
Date: Sun Sep 17 20:29:47 2017
New Revision: 313507
URL: http://llvm.org/viewvc/llvm-project?rev=313507&view=rev
Log:
[X86] Teach execution domain fixing to convert between VPERMILPS and VPSHUFD.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
llvm/trunk/test/CodeGen/X86/avx-basic.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffles/shuffle.ll
llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
llvm/trunk/test/CodeGen/X86/extract-store.ll
llvm/trunk/test/CodeGen/X86/i64-to-float.ll
llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/masked_memop.ll
llvm/trunk/test/CodeGen/X86/palignr.ll
llvm/trunk/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
llvm/trunk/test/CodeGen/X86/shuffle-strided-with-offset-128.ll
llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-128.ll
llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx2.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
llvm/trunk/test/CodeGen/X86/swizzle-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Sep 17 20:29:47 2017
@@ -9405,6 +9405,14 @@ static const uint16_t ReplaceableInstrs[
{ X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr },
{ X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr },
{ X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr },
+ { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi },
+ { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri },
+ { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi },
+ { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri },
+ { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi },
+ { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
+ { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
+ { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
@@ -9430,6 +9438,8 @@ static const uint16_t ReplaceableInstrsA
{ X86::VBLENDPSrmi, X86::VBLENDPSrmi, X86::VPBLENDDrmi },
{ X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
{ X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
+ { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
+ { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
};
static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
Modified: llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll Sun Sep 17 20:29:47 2017
@@ -5,8 +5,8 @@ define void @endless_loop() {
; CHECK-NEXT: # BB#0:
; CHECK-NEXT: vmovaps (%eax), %ymm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
Modified: llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll (original)
+++ llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll Sun Sep 17 20:29:47 2017
@@ -66,7 +66,7 @@ define <4 x i32> @reverse_1(<4 x i32>* %
define <4 x i32> @no_reverse_shuff(<4 x i32>* %pA, <4 x i32>* %pB) {
; CHECK-LABEL: no_reverse_shuff:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
; CHECK-NEXT: retq
%A = load <4 x i32>, <4 x i32>* %pA
%B = load <4 x i32>, <4 x i32>* %pB
Modified: llvm/trunk/test/CodeGen/X86/avx-basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-basic.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-basic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-basic.ll Sun Sep 17 20:29:47 2017
@@ -80,8 +80,8 @@ define <4 x i64> @ISelCrash(<4 x i64> %a
define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: VMOVZQI2PQI:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sun Sep 17 20:29:47 2017
@@ -2399,8 +2399,8 @@ define <4 x i64> @test_mm256_set1_epi16(
define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
; X32-LABEL: test_mm256_set1_epi32:
; X32: # BB#0:
-; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Sun Sep 17 20:29:47 2017
@@ -114,7 +114,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %pt
; X32-NEXT: movl (%ecx), %ecx
; X32-NEXT: vmovd %ecx, %xmm0
; X32-NEXT: movl %ecx, (%eax)
-; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
@@ -123,7 +123,7 @@ define <8 x i32> @B3(i32* %ptr, i32* %pt
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: movl %eax, (%rsi)
-; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
entry:
@@ -386,12 +386,12 @@ define <4 x i32> @load_splat_4i32_4i32_1
; X32-LABEL: load_splat_4i32_4i32_1111:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
; X32-NEXT: retl
;
; X64-LABEL: load_splat_4i32_4i32_1111:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
@@ -488,12 +488,12 @@ define <2 x i64> @load_splat_2i64_2i64_1
; X32-LABEL: load_splat_2i64_2i64_1111:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
; X32-NEXT: retl
;
; X64-LABEL: load_splat_2i64_2i64_1111:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
@@ -602,8 +602,8 @@ define <2 x i64> @G(i64* %ptr) nounwind
;
; X64-LABEL: G:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
@@ -645,12 +645,12 @@ entry:
define <4 x i32> @H(<4 x i32> %a) {
; X32-LABEL: H:
; X32: ## BB#0: ## %entry
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: H:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X64-NEXT: retq
entry:
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll Sun Sep 17 20:29:47 2017
@@ -2536,12 +2536,12 @@ declare <4 x i64> @llvm.x86.avx2.psad.bw
define <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) {
; X32-LABEL: test_mm256_shuffle_epi32:
; X32: # BB#0:
-; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
+; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_shuffle_epi32:
; X64: # BB#0:
-; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
+; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Sun Sep 17 20:29:47 2017
@@ -3716,33 +3716,33 @@ define <8 x i32> @test_pshufd(<8 x i32>
; GENERIC: # BB#0:
; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pshufd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; SKYLAKE-LABEL: test_pshufd:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pshufd:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
+; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
%2 = load <8 x i32>, <8 x i32> *%a1, align 32
%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
- %4 = or <8 x i32> %1, %3
+ %4 = add <8 x i32> %1, %3
ret <8 x i32> %4
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Sun Sep 17 20:29:47 2017
@@ -586,12 +586,12 @@ define <8 x double> @test_mm512_maskz_pe
define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
; X32-LABEL: test_mm512_shuffle_epi32:
; X32: # BB#0:
-; X32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
+; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_epi32:
; X64: # BB#0:
-; X64-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
+; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/partial_permute.ll Sun Sep 17 20:29:47 2017
@@ -963,9 +963,9 @@ define <8 x i16> @test_masked_z_32xi16_t
define <4 x i32> @test_8xi32_to_4xi32_perm_mask0(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,2]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,2]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 4, i32 0, i32 3, i32 2>
@@ -1067,9 +1067,9 @@ define <4 x i32> @test_masked_z_8xi32_to
define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,1]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,1]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 5, i32 3, i32 2, i32 5>
@@ -1222,11 +1222,11 @@ define <4 x i32> @test_masked_z_8xi32_to
define <4 x i32> @test_8xi32_to_4xi32_perm_mem_mask3(<8 x i32>* %vp) {
; CHECK-LABEL: test_8xi32_to_4xi32_perm_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovdqa (%rdi), %ymm0
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@@ -3521,7 +3521,7 @@ define <8 x float> @test_masked_z_16xflo
define <4 x float> @test_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp) {
; CHECK-LABEL: test_16xfloat_to_4xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: vmovapd (%rdi), %zmm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,3,3]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
@@ -3536,7 +3536,7 @@ define <4 x float> @test_16xfloat_to_4xf
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vmovapd (%rdi), %zmm1
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3]
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
@@ -3556,7 +3556,7 @@ define <4 x float> @test_masked_16xfloat
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: vmovapd (%rdi), %zmm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,3,3]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffles/shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffles/shuffle.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffles/shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffles/shuffle.ll Sun Sep 17 20:29:47 2017
@@ -2091,7 +2091,7 @@ define <32 x i16> @test_masked_z_32xi16_
define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) {
; CHECK-LABEL: test_4xi32_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,3,0]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0]
; CHECK-NEXT: retq
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
ret <4 x i32> %res
@@ -2171,7 +2171,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) {
; CHECK-LABEL: test_4xi32_perm_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3]
; CHECK-NEXT: retq
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
ret <4 x i32> %res
@@ -2203,7 +2203,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) {
; CHECK-LABEL: test_4xi32_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,3,3]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
@@ -2290,7 +2290,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) {
; CHECK-LABEL: test_4xi32_perm_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,1,0]
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
%res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
@@ -2325,7 +2325,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
ret <8 x i32> %res
@@ -2405,7 +2405,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
; CHECK-LABEL: test_8xi32_perm_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
ret <8 x i32> %res
@@ -2437,7 +2437,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) {
; CHECK-LABEL: test_8xi32_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
@@ -2524,7 +2524,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) {
; CHECK-LABEL: test_8xi32_perm_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
%res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
@@ -2559,7 +2559,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
; CHECK-NEXT: retq
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
ret <16 x i32> %res
@@ -2639,7 +2639,7 @@ define <16 x i32> @test_masked_z_16xi32_
define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
; CHECK-LABEL: test_16xi32_perm_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
; CHECK-NEXT: retq
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
ret <16 x i32> %res
@@ -2671,7 +2671,7 @@ define <16 x i32> @test_masked_z_16xi32_
define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15]
; CHECK-NEXT: retq
%vec = load <16 x i32>, <16 x i32>* %vp
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
@@ -2758,7 +2758,7 @@ define <16 x i32> @test_masked_z_16xi32_
define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
; CHECK-LABEL: test_16xi32_perm_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13]
; CHECK-NEXT: retq
%vec = load <16 x i32>, <16 x i32>* %vp
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Sun Sep 17 20:29:47 2017
@@ -268,8 +268,8 @@ define <2 x i32> @trunc_qd_128(<2 x i64>
define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
; KNL-LABEL: trunc_qd_128_mem:
; KNL: ## BB#0:
-; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT: vmovlps %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qd_128_mem:
Modified: llvm/trunk/test/CodeGen/X86/extract-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-store.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-store.ll Sun Sep 17 20:29:47 2017
@@ -363,8 +363,8 @@ define void @extract_i64_1(i64* nocaptur
; AVX-X32-LABEL: extract_i64_1:
; AVX-X32: # BB#0:
; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-X32-NEXT: vmovq %xmm0, (%eax)
+; AVX-X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-X32-NEXT: vmovlps %xmm0, (%eax)
; AVX-X32-NEXT: retl
;
; AVX-X64-LABEL: extract_i64_1:
Modified: llvm/trunk/test/CodeGen/X86/i64-to-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-to-float.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i64-to-float.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i64-to-float.ll Sun Sep 17 20:29:47 2017
@@ -207,8 +207,8 @@ define <2 x double> @clamp_sitofp_2i64_2
; X32-AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; X32-AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm0[0],zero
; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-AVX-NEXT: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
; X32-AVX-NEXT: fstpl {{[0-9]+}}(%esp)
; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp)
Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Sun Sep 17 20:29:47 2017
@@ -140,15 +140,15 @@ define <4 x i32> @knownbits_mask_shuffle
define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
; X32-LABEL: knownbits_mask_shuffle_uitofp:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_uitofp:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -160,17 +160,17 @@ define <4 x float> @knownbits_mask_shuff
define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vorps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -183,17 +183,17 @@ define <4 x float> @knownbits_mask_or_sh
define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vxorps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X32-NEXT: vcvtdq2ps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
; X64-NEXT: retq
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -394,20 +394,20 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x
define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; X32-LABEL: knownbits_mask_concat_uitofp:
; X32: # BB#0:
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
-; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm1, %xmm1
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; X32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_concat_uitofp:
; X64: # BB#0:
-; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
-; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Sun Sep 17 20:29:47 2017
@@ -1066,7 +1066,7 @@ define void @test20(<2 x float>%a1, <2 x
;
; SKX_32-LABEL: test20:
; SKX_32: # BB#0:
-; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
@@ -1163,7 +1163,7 @@ define <2 x float> @test22(float* %base,
;
; SKX-LABEL: test22:
; SKX: # BB#0:
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
@@ -1174,7 +1174,7 @@ define <2 x float> @test22(float* %base,
;
; SKX_32-LABEL: test22:
; SKX_32: # BB#0:
-; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
@@ -1454,7 +1454,7 @@ define <2 x float> @test27(float* %base,
;
; SKX-LABEL: test27:
; SKX: # BB#0:
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovw %eax, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
@@ -1462,7 +1462,7 @@ define <2 x float> @test27(float* %base,
;
; SKX_32-LABEL: test27:
; SKX_32: # BB#0:
-; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: movb $3, %cl
; SKX_32-NEXT: kmovw %ecx, %k1
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Sun Sep 17 20:29:47 2017
@@ -477,7 +477,7 @@ define void @test15(<2 x i32> %trigger,
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi)
; AVX1-NEXT: retq
;
@@ -564,7 +564,7 @@ define <2 x i32> @test17(<2 x i32> %trig
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX1-NEXT: retq
@@ -576,7 +576,7 @@ define <2 x i32> @test17(<2 x i32> %trig
; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX2-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -588,7 +588,7 @@ define <2 x i32> @test17(<2 x i32> %trig
; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX512F-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/palignr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/palignr.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/palignr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/palignr.ll Sun Sep 17 20:29:47 2017
@@ -11,7 +11,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4
;
; CHECK-AVX-LABEL: test1:
; CHECK-AVX: # BB#0:
-; CHECK-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
+; CHECK-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,0]
; CHECK-AVX-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
ret <4 x i32> %C
Modified: llvm/trunk/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-of-splat-multiuses.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-of-splat-multiuses.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-of-splat-multiuses.ll Sun Sep 17 20:29:47 2017
@@ -42,7 +42,7 @@ define <8 x float> @foo8(<8 x float> %v,
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -52,7 +52,7 @@ define <4 x i32> @undef_splatmask(<4 x i
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask2:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -62,7 +62,7 @@ define <4 x i32> @undef_splatmask2(<4 x
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask3:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
@@ -72,10 +72,10 @@ define <4 x i32> @undef_splatmask3(<4 x
define <4 x i32> @undef_splatmask4(<4 x i32> %v, <4 x i32>* %p) nounwind {
; AVX2-LABEL: undef_splatmask4:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
-; AVX2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX2-NEXT: vmovaps %xmm0, (%rdi)
+; AVX2-NEXT: vmovaps %xmm1, %xmm0
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
Modified: llvm/trunk/test/CodeGen/X86/shuffle-strided-with-offset-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-strided-with-offset-128.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-strided-with-offset-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-strided-with-offset-128.ll Sun Sep 17 20:29:47 2017
@@ -139,14 +139,14 @@ define void @shuffle_v4i32_to_v2i32_1(<4
;
; AVX-LABEL: shuffle_v4i32_to_v2i32_1:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; AVX-NEXT: vmovq %xmm0, (%rsi)
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,3,2,3]
+; AVX-NEXT: vmovlps %xmm0, (%rsi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: shuffle_v4i32_to_v2i32_1:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; AVX512F-NEXT: vmovq %xmm0, (%rsi)
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,3,2,3]
+; AVX512F-NEXT: vmovlps %xmm0, (%rsi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i32_to_v2i32_1:
@@ -157,8 +157,8 @@ define void @shuffle_v4i32_to_v2i32_1(<4
;
; AVX512BW-LABEL: shuffle_v4i32_to_v2i32_1:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; AVX512BW-NEXT: vmovq %xmm0, (%rsi)
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,3,2,3]
+; AVX512BW-NEXT: vmovlps %xmm0, (%rsi)
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: shuffle_v4i32_to_v2i32_1:
Modified: llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-128.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-128.ll Sun Sep 17 20:29:47 2017
@@ -241,14 +241,14 @@ define void @shuffle_v4i32_to_v2i32(<4 x
;
; AVX-LABEL: shuffle_v4i32_to_v2i32:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX-NEXT: vmovq %xmm0, (%rsi)
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX-NEXT: vmovlps %xmm0, (%rsi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: shuffle_v4i32_to_v2i32:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX512F-NEXT: vmovq %xmm0, (%rsi)
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX512F-NEXT: vmovlps %xmm0, (%rsi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i32_to_v2i32:
@@ -259,8 +259,8 @@ define void @shuffle_v4i32_to_v2i32(<4 x
;
; AVX512BW-LABEL: shuffle_v4i32_to_v2i32:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX512BW-NEXT: vmovq %xmm0, (%rsi)
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX512BW-NEXT: vmovlps %xmm0, (%rsi)
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: shuffle_v4i32_to_v2i32:
@@ -283,14 +283,14 @@ define void @trunc_v2i64_to_v2i32(<4 x i
;
; AVX-LABEL: trunc_v2i64_to_v2i32:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX-NEXT: vmovq %xmm0, (%rsi)
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX-NEXT: vmovlps %xmm0, (%rsi)
; AVX-NEXT: retq
;
; AVX512F-LABEL: trunc_v2i64_to_v2i32:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX512F-NEXT: vmovq %xmm0, (%rsi)
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX512F-NEXT: vmovlps %xmm0, (%rsi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_v2i64_to_v2i32:
@@ -301,8 +301,8 @@ define void @trunc_v2i64_to_v2i32(<4 x i
;
; AVX512BW-LABEL: trunc_v2i64_to_v2i32:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,2,2,3]
-; AVX512BW-NEXT: vmovq %xmm0, (%rsi)
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,2,2,3]
+; AVX512BW-NEXT: vmovlps %xmm0, (%rsi)
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_v2i64_to_v2i32:
Modified: llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/split-extend-vector-inreg.ll Sun Sep 17 20:29:47 2017
@@ -5,7 +5,7 @@
define <4 x i64> @autogen_SD88863() {
; X32-LABEL: autogen_SD88863:
; X32: # BB#0: # %BB
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
@@ -20,7 +20,7 @@ define <4 x i64> @autogen_SD88863() {
;
; X64-LABEL: autogen_SD88863:
; X64: # BB#0: # %BB
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx1.ll Sun Sep 17 20:29:47 2017
@@ -771,7 +771,9 @@ define <4 x i32> @stack_fold_pshufd(<4 x
;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- ret <4 x i32> %2
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
}
define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx2.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx2.ll Sun Sep 17 20:29:47 2017
@@ -859,7 +859,9 @@ define <8 x i32> @stack_fold_pshufd(<8 x
;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- ret <8 x i32> %2
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
}
define <16 x i16> @stack_fold_vpshufhw(<16 x i16> %a0) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sun Sep 17 20:29:47 2017
@@ -1252,7 +1252,8 @@ define <16 x i32> @stack_fold_pshufd_zmm
;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
- ret <16 x i32> %2
+ %3 = add <16 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i32> %3
}
define <16 x i32> @stack_fold_pshufd_zmm_mask(<16 x i32> %passthru, <16 x i32> %a0, i16 %mask) {
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sun Sep 17 20:29:47 2017
@@ -1693,7 +1693,8 @@ define <4 x i32> @stack_fold_pshufd(<4 x
;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- ret <4 x i32> %2
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
}
define <4 x i32> @stack_fold_pshufd_mask(<4 x i32> %passthru, <4 x i32> %a0, i8 %mask) {
@@ -1723,7 +1724,8 @@ define <8 x i32> @stack_fold_pshufd_ymm(
;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- ret <8 x i32> %2
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
}
define <8 x i32> @stack_fold_pshufd_ymm_mask(<8 x i32> %passthru, <8 x i32> %a0, i8 %mask) {
Modified: llvm/trunk/test/CodeGen/X86/swizzle-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/swizzle-avx2.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/swizzle-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/swizzle-avx2.ll Sun Sep 17 20:29:47 2017
@@ -25,7 +25,7 @@ define <8 x i32> @swizzle_1(<8 x i32> %v
define <8 x i32> @swizzle_2(<8 x i32> %v) {
; CHECK-LABEL: swizzle_2:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: retq
%1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
@@ -35,7 +35,7 @@ define <8 x i32> @swizzle_2(<8 x i32> %v
define <8 x i32> @swizzle_3(<8 x i32> %v) {
; CHECK-LABEL: swizzle_3:
; CHECK: # BB#0:
-; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: retq
%1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll Sun Sep 17 20:29:47 2017
@@ -685,9 +685,9 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
@@ -887,9 +887,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll Sun Sep 17 20:29:47 2017
@@ -635,9 +635,9 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
@@ -824,9 +824,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Sun Sep 17 20:29:47 2017
@@ -15,7 +15,7 @@ define <2 x i64> @shuffle_v2i64_00(<2 x
;
; AVX1-LABEL: shuffle_v2i64_00:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2i64_00:
@@ -38,7 +38,7 @@ define <2 x i64> @shuffle_v2i64_10(<2 x
;
; AVX-LABEL: shuffle_v2i64_10:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
@@ -51,7 +51,7 @@ define <2 x i64> @shuffle_v2i64_11(<2 x
;
; AVX-LABEL: shuffle_v2i64_11:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %shuffle
@@ -64,7 +64,7 @@ define <2 x i64> @shuffle_v2i64_22(<2 x
;
; AVX1-LABEL: shuffle_v2i64_22:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2i64_22:
@@ -87,7 +87,7 @@ define <2 x i64> @shuffle_v2i64_32(<2 x
;
; AVX-LABEL: shuffle_v2i64_32:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,0,1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
ret <2 x i64> %shuffle
@@ -100,7 +100,7 @@ define <2 x i64> @shuffle_v2i64_33(<2 x
;
; AVX-LABEL: shuffle_v2i64_33:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,3,2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
ret <2 x i64> %shuffle
@@ -1363,8 +1363,8 @@ define <2 x i64> @insert_dup_mem_v2i64(i
;
; AVX1-LABEL: insert_dup_mem_v2i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_dup_mem_v2i64:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Sun Sep 17 20:29:47 2017
@@ -15,7 +15,7 @@ define <4 x i32> @shuffle_v4i32_0001(<4
;
; AVX-LABEL: shuffle_v4i32_0001:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x i32> %shuffle
@@ -28,7 +28,7 @@ define <4 x i32> @shuffle_v4i32_0020(<4
;
; AVX-LABEL: shuffle_v4i32_0020:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x i32> %shuffle
@@ -41,7 +41,7 @@ define <4 x i32> @shuffle_v4i32_0112(<4
;
; AVX-LABEL: shuffle_v4i32_0112:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -54,7 +54,7 @@ define <4 x i32> @shuffle_v4i32_0300(<4
;
; AVX-LABEL: shuffle_v4i32_0300:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x i32> %shuffle
@@ -67,7 +67,7 @@ define <4 x i32> @shuffle_v4i32_1000(<4
;
; AVX-LABEL: shuffle_v4i32_1000:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
ret <4 x i32> %shuffle
@@ -80,7 +80,7 @@ define <4 x i32> @shuffle_v4i32_2200(<4
;
; AVX-LABEL: shuffle_v4i32_2200:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
ret <4 x i32> %shuffle
@@ -93,7 +93,7 @@ define <4 x i32> @shuffle_v4i32_3330(<4
;
; AVX-LABEL: shuffle_v4i32_3330:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
ret <4 x i32> %shuffle
@@ -106,7 +106,7 @@ define <4 x i32> @shuffle_v4i32_3210(<4
;
; AVX-LABEL: shuffle_v4i32_3210:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %shuffle
@@ -120,7 +120,7 @@ define <4 x i32> @shuffle_v4i32_2121(<4
;
; AVX-LABEL: shuffle_v4i32_2121:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,1]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
ret <4 x i32> %shuffle
@@ -447,9 +447,9 @@ define <4 x i32> @shuffle_v4i32_0412(<4
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0412:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -490,8 +490,8 @@ define <4 x i32> @shuffle_v4i32_4012(<4
;
; AVX2OR512VL-LABEL: shuffle_v4i32_4012:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -1249,7 +1249,7 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
ret <4 x i32> %shuffle
@@ -1295,7 +1295,7 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
ret <4 x i32> %shuffle
@@ -1353,9 +1353,9 @@ define <4 x i32> @shuffle_v4i32_z6zz(<4
;
; AVX2OR512VL-LABEL: shuffle_v4i32_z6zz:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX2OR512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x i32> %shuffle
@@ -1824,7 +1824,7 @@ define <4 x float> @shuffle_v4f32_bitcas
;
; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Sun Sep 17 20:29:47 2017
@@ -14,7 +14,7 @@ define <8 x i16> @shuffle_v8i16_01012323
;
; AVX-LABEL: shuffle_v8i16_01012323:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
ret <8 x i16> %shuffle
@@ -27,7 +27,7 @@ define <8 x i16> @shuffle_v8i16_67452301
;
; AVX-LABEL: shuffle_v8i16_67452301:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
ret <8 x i16> %shuffle
@@ -167,7 +167,7 @@ define <8 x i16> @shuffle_v8i16_23016745
;
; AVX-LABEL: shuffle_v8i16_23016745:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
@@ -1119,7 +1119,7 @@ define <8 x i16> @shuffle_v8i16_XXXdXXXX
;
; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Sun Sep 17 20:29:47 2017
@@ -3243,15 +3243,10 @@ define <16 x i16> @shuffle_v16i16_00_03_
}
define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
-; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Sun Sep 17 20:29:47 2017
@@ -593,7 +593,7 @@ define <4 x i64> @shuffle_v4i64_0000(<4
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0001:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -615,7 +615,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -677,8 +677,8 @@ define <4 x i64> @shuffle_v4i64_0300(<4
define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1000:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -983,14 +983,14 @@ define <4 x i64> @shuffle_v4i64_1054(<4
;
; AVX2-LABEL: shuffle_v4i64_1054:
; AVX2: # BB#0:
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_1054:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
ret <4 x i64> %shuffle
@@ -1005,14 +1005,14 @@ define <4 x i64> @shuffle_v4i64_3254(<4
;
; AVX2-LABEL: shuffle_v4i64_3254:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_3254:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x i64> %shuffle
@@ -1027,14 +1027,14 @@ define <4 x i64> @shuffle_v4i64_3276(<4
;
; AVX2-LABEL: shuffle_v4i64_3276:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_3276:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x i64> %shuffle
@@ -1049,14 +1049,14 @@ define <4 x i64> @shuffle_v4i64_1076(<4
;
; AVX2-LABEL: shuffle_v4i64_1076:
; AVX2: # BB#0:
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_1076:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
ret <4 x i64> %shuffle
@@ -1158,7 +1158,7 @@ define <4 x i64> @shuffle_v4i64_15uu(<4
define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
; ALL-LABEL: shuffle_v4i64_11uu:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
ret <4 x i64> %shuffle
@@ -1168,7 +1168,7 @@ define <4 x i64> @shuffle_v4i64_22uu(<4
; AVX1-LABEL: shuffle_v4i64_22uu:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_22uu:
@@ -1403,7 +1403,7 @@ define <4 x double> @splat128_mem_v4f64_
define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Sun Sep 17 20:29:47 2017
@@ -943,8 +943,8 @@ define <8 x i32> @shuffle_v8i32_00000000
define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000010:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -960,8 +960,8 @@ define <8 x i32> @shuffle_v8i32_00000010
define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000200:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -977,8 +977,8 @@ define <8 x i32> @shuffle_v8i32_00000200
define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00003000:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1069,7 +1069,7 @@ define <8 x i32> @shuffle_v8i32_01014545
;
; AVX2OR512VL-LABEL: shuffle_v8i32_01014545:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x i32> %shuffle
@@ -1078,8 +1078,8 @@ define <8 x i32> @shuffle_v8i32_01014545
define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00112233:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1095,8 +1095,8 @@ define <8 x i32> @shuffle_v8i32_00112233
define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00001111:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1144,9 +1144,9 @@ define <8 x i32> @shuffle_v8i32_08084c4c
;
; AVX2OR512VL-LABEL: shuffle_v8i32_08084c4c:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x i32> %shuffle
@@ -1264,8 +1264,8 @@ define <8 x i32> @shuffle_v8i32_08991abb
define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_091b2d3f:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
@@ -1282,7 +1282,7 @@ define <8 x i32> @shuffle_v8i32_091b2d3f
define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_09ab1def:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq
@@ -1298,113 +1298,73 @@ define <8 x i32> @shuffle_v8i32_09ab1def
}
define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_00014445:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00014445:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_00014445:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_00204464:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00204464:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_00204464:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_03004744:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_03004744:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_03004744:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_10005444:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_10005444:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_10005444:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_22006644:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_22006644:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_22006644:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_33307774:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_33307774:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_33307774:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_32107654:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_32107654:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_32107654:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_00234467:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_00234467:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_00234467:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
ret <8 x i32> %shuffle
}
@@ -1417,22 +1377,17 @@ define <8 x i32> @shuffle_v8i32_00224466
;
; AVX2OR512VL-LABEL: shuffle_v8i32_00224466:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_10325476:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_10325476:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_10325476:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i32> %shuffle
}
@@ -1445,36 +1400,26 @@ define <8 x i32> @shuffle_v8i32_11335577
;
; AVX2OR512VL-LABEL: shuffle_v8i32_11335577:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_10235467:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_10235467:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_10235467:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_10225466:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_10225466:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_10225466:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
ret <8 x i32> %shuffle
}
@@ -1801,8 +1746,8 @@ define <8 x i32> @shuffle_v8i32_32103210
;
; AVX2-LABEL: shuffle_v8i32_32103210:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_32103210:
@@ -1815,17 +1760,11 @@ define <8 x i32> @shuffle_v8i32_32103210
}
define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_76547654:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v8i32_76547654:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
-; AVX2-NEXT: retq
+; AVX1OR2-LABEL: shuffle_v8i32_76547654:
+; AVX1OR2: # BB#0:
+; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_76547654:
; AVX512VL: # BB#0:
@@ -1837,17 +1776,11 @@ define <8 x i32> @shuffle_v8i32_76547654
}
define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_76543210:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v8i32_76543210:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX2-NEXT: retq
+; AVX1OR2-LABEL: shuffle_v8i32_76543210:
+; AVX1OR2: # BB#0:
+; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_76543210:
; AVX512VL: # BB#0:
@@ -1859,17 +1792,11 @@ define <8 x i32> @shuffle_v8i32_76543210
}
define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_3210ba98:
-; AVX1: # BB#0:
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_3210ba98:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_3210ba98:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
ret <8 x i32> %shuffle
}
@@ -1883,41 +1810,29 @@ define <8 x i32> @shuffle_v8i32_3210fedc
;
; AVX2OR512VL-LABEL: shuffle_v8i32_3210fedc:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_7654fedc:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_7654fedc:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_7654fedc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_fedc7654:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_fedc7654:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_fedc7654:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
}
@@ -1931,8 +1846,8 @@ define <8 x i32> @shuffle_v8i32_ba987654
;
; AVX2OR512VL-LABEL: shuffle_v8i32_ba987654:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
@@ -1947,8 +1862,8 @@ define <8 x i32> @shuffle_v8i32_ba983210
;
; AVX2OR512VL-LABEL: shuffle_v8i32_ba983210:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %shuffle
@@ -2000,17 +1915,11 @@ define <8 x i32> @shuffle_v8i32_80u1b4uu
}
define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_uuuu1111:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_uuuu1111:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_uuuu1111:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %shuffle
}
@@ -2018,7 +1927,7 @@ define <8 x i32> @shuffle_v8i32_uuuu1111
define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: shuffle_v8i32_2222uuuu:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %shuffle
@@ -2068,17 +1977,11 @@ define <8 x i32> @shuffle_v8i32_44444444
}
define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_5555uuuu:
-; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_5555uuuu:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_5555uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %shuffle
}
@@ -2092,7 +1995,7 @@ define <8 x i32> @shuffle_v8i32_uuuuuu7u
;
; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u:
; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7]
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
ret <8 x i32> %shuffle
@@ -2221,29 +2124,19 @@ define <8 x i32> @shuffle_v8i32_389A7CDE
}
define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_30127456:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_30127456:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_30127456:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
-; AVX1-LABEL: shuffle_v8i32_12305674:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
-; AVX1-NEXT: retq
-;
-; AVX2OR512VL-LABEL: shuffle_v8i32_12305674:
-; AVX2OR512VL: # BB#0:
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
-; AVX2OR512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8i32_12305674:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
ret <8 x i32> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Sun Sep 17 20:29:47 2017
@@ -348,7 +348,7 @@ define <4 x float> @test_v16f32_0_1_3_6
define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
+; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
ret <16 x i32> %c
@@ -357,7 +357,7 @@ define <16 x i32> @shuffle_v16i16_1_0_0_
define <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
+; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
ret <16 x i32> %c
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Sun Sep 17 20:29:47 2017
@@ -1551,12 +1551,12 @@ define <8 x i64> @shuffle_v8i64_00224466
;
; AVX512F-LABEL: shuffle_v8i64_00224466:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; AVX512F-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_00224466:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
+; AVX512F-32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
@@ -1566,12 +1566,12 @@ define <8 x i64> @shuffle_v8i64_10325476
;
; AVX512F-LABEL: shuffle_v8i64_10325476:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
+; AVX512F-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_10325476:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
+; AVX512F-32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i64> %shuffle
@@ -1581,12 +1581,12 @@ define <8 x i64> @shuffle_v8i64_11335577
;
; AVX512F-LABEL: shuffle_v8i64_11335577:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512F-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_11335577:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
+; AVX512F-32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x i64> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Sep 17 20:29:47 2017
@@ -237,7 +237,7 @@ define <16 x i8> @combine_pshufb_palignr
;
; AVX-LABEL: combine_pshufb_palignr:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Sun Sep 17 20:29:47 2017
@@ -98,7 +98,7 @@ define <4 x i32> @combine_pshufd6(<4 x i
;
; AVX1-LABEL: combine_pshufd6:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_pshufd6:
@@ -175,8 +175,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test1:
; AVX: # BB#0:
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
@@ -193,8 +193,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test2:
; AVX: # BB#0:
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
@@ -211,8 +211,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test3:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
@@ -229,8 +229,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test4:
; AVX: # BB#0:
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
@@ -247,8 +247,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test5:
; AVX: # BB#0:
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
@@ -265,8 +265,8 @@ define <4 x i32> @combine_bitwise_ops_te
;
; AVX-LABEL: combine_bitwise_ops_test6:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
@@ -664,7 +664,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test1:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
@@ -679,7 +679,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test2:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
@@ -694,7 +694,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test3:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
@@ -709,7 +709,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test4:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test4:
@@ -729,7 +729,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test5:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
@@ -744,7 +744,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test6:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
@@ -759,7 +759,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test7:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
@@ -774,7 +774,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test8:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
@@ -789,7 +789,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test9:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,2]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
@@ -804,7 +804,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test10:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
@@ -819,7 +819,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test11:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
@@ -834,7 +834,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test12:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test12:
@@ -910,9 +910,9 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX2-LABEL: combine_nested_undef_test15:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
@@ -948,8 +948,8 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX2-LABEL: combine_nested_undef_test16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
@@ -983,8 +983,8 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX2-LABEL: combine_nested_undef_test17:
; AVX2: # BB#0:
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
@@ -999,7 +999,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test18:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,0,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
@@ -1033,8 +1033,8 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX2-LABEL: combine_nested_undef_test19:
; AVX2: # BB#0:
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
@@ -1070,8 +1070,8 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX2-LABEL: combine_nested_undef_test20:
; AVX2: # BB#0:
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,3,0]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
@@ -1125,7 +1125,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test22:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,1,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
@@ -1140,7 +1140,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test23:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,0,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
@@ -1155,7 +1155,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test24:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
@@ -1170,7 +1170,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test25:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test25:
@@ -1190,7 +1190,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test26:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
@@ -1205,7 +1205,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX1-LABEL: combine_nested_undef_test27:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test27:
@@ -1225,7 +1225,7 @@ define <4 x i32> @combine_nested_undef_t
;
; AVX-LABEL: combine_nested_undef_test28:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Sun Sep 17 20:29:47 2017
@@ -5,7 +5,7 @@
define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
; AVX512F-LABEL: shuf2i1_1_0:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf2i1_1_0:
@@ -49,7 +49,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a
define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
; AVX512F-LABEL: shuf4i1_3_2_10:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=313507&r1=313506&r2=313507&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sun Sep 17 20:29:47 2017
@@ -96,7 +96,7 @@ define <4 x i32> @load_splat_4i32_4i32_0
;
; AVX1-LABEL: load_splat_4i32_4i32_0101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_4i32_4i32_0101:
@@ -123,7 +123,7 @@ define <8 x i32> @load_splat_8i32_4i32_0
;
; AVX1-LABEL: load_splat_8i32_4i32_01010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -178,7 +178,7 @@ define <8 x i16> @load_splat_8i16_8i16_0
;
; AVX1-LABEL: load_splat_8i16_8i16_01010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_8i16_8i16_01010101:
@@ -204,7 +204,7 @@ define <8 x i16> @load_splat_8i16_8i16_0
;
; AVX1-LABEL: load_splat_8i16_8i16_01230123:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_8i16_8i16_01230123:
@@ -231,7 +231,7 @@ define <16 x i16> @load_splat_16i16_8i16
;
; AVX1-LABEL: load_splat_16i16_8i16_0101010101010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -259,7 +259,7 @@ define <16 x i16> @load_splat_16i16_8i16
;
; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -359,7 +359,7 @@ define <16 x i8> @load_splat_16i8_16i8_0
;
; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123:
@@ -385,7 +385,7 @@ define <16 x i8> @load_splat_16i8_16i8_0
;
; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567:
@@ -442,7 +442,7 @@ define <32 x i8> @load_splat_32i8_16i8_0
;
; AVX1-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -470,7 +470,7 @@ define <32 x i8> @load_splat_32i8_16i8_0
;
; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
More information about the llvm-commits
mailing list