[llvm] r290869 - [AVX-512] Teach EVEX to VEX conversion pass to handle VINSERT and VEXTRACT instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 2 21:46:18 PST 2017
Author: ctopper
Date: Mon Jan 2 23:46:18 2017
New Revision: 290869
URL: http://llvm.org/viewvc/llvm-project?rev=290869&view=rev
Log:
[AVX-512] Teach EVEX to VEX conversion pass to handle VINSERT and VEXTRACT instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/masked_memop.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/trunk/test/CodeGen/X86/vector-trunc.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrTablesInfo.h Mon Jan 2 23:46:18 2017
@@ -755,6 +755,14 @@ static const X86EvexToVexCompressTableEn
{ X86::VDIVPDZ256rr , X86::VDIVPDYrr },
{ X86::VDIVPSZ256rm , X86::VDIVPSYrm },
{ X86::VDIVPSZ256rr , X86::VDIVPSYrr },
+ { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
+ { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
+ { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
+ { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
+ { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
+ { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
+ { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
+ { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
{ X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
{ X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
{ X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
@@ -827,6 +835,14 @@ static const X86EvexToVexCompressTableEn
{ X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
{ X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
{ X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
+ { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
+ { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
+ { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
+ { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
+ { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
+ { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
+ { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
+ { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
{ X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
{ X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
{ X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Mon Jan 2 23:46:18 2017
@@ -209,34 +209,22 @@ entry:
}
define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-AVX2-LABEL: QQ64:
-; X32-AVX2: ## BB#0: ## %entry
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: movl (%eax), %ecx
-; X32-AVX2-NEXT: movl 4(%eax), %eax
-; X32-AVX2-NEXT: vmovd %ecx, %xmm0
-; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX2-NEXT: retl
+; X32-LABEL: QQ64:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
;
; X64-LABEL: QQ64:
; X64: ## BB#0: ## %entry
; X64-NEXT: vbroadcastsd (%rdi), %ymm0
; X64-NEXT: retq
-;
-; X32-AVX512VL-LABEL: QQ64:
-; X32-AVX512VL: ## BB#0: ## %entry
-; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512VL-NEXT: movl (%eax), %ecx
-; X32-AVX512VL-NEXT: movl 4(%eax), %eax
-; X32-AVX512VL-NEXT: vmovd %ecx, %xmm0
-; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512VL-NEXT: retl
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -1683,7 +1671,7 @@ define void @isel_crash_4q(i64* %cV_R.ad
; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
-; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1
+; X32-AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp)
; X32-AVX512VL-NEXT: movl %ebp, %esp
Modified: llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-extract-subvector.ll Mon Jan 2 23:46:18 2017
@@ -60,7 +60,7 @@ define <32 x i8> @extract_subvector256_v
define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v8f64_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextractf64x2 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -72,7 +72,7 @@ entry:
define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v8f32_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextractf32x4 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -84,7 +84,7 @@ entry:
define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v4i64_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextracti64x2 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
@@ -96,7 +96,7 @@ entry:
define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v8i32_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -108,7 +108,7 @@ entry:
define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v16i16_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -120,7 +120,7 @@ entry:
define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v32i8_store:
; SKX: ## BB#0: ## %entry
-; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi)
+; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
; SKX-NEXT: retq
entry:
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Mon Jan 2 23:46:18 2017
@@ -463,7 +463,7 @@ define i64 @extract_v4i64(<4 x i64> %x,
; SKX-LABEL: extract_v4i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
-; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <4 x i64> %x, i32 1
@@ -521,7 +521,7 @@ define i32 @extract_v8i32(<8 x i32> %x,
; SKX-LABEL: extract_v8i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <8 x i32> %x, i32 1
@@ -582,7 +582,7 @@ define i16 @extract_v16i16(<16 x i16> %x
; SKX-LABEL: extract_v16i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
@@ -646,7 +646,7 @@ define i8 @extract_v32i8(<32 x i8> %x, i
; SKX-LABEL: extract_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
@@ -714,9 +714,9 @@ define <4 x i64> @insert_v4i64(<4 x i64>
; SKX: ## BB#0:
; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
-; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%val = load i64, i64* %ptr
%r1 = insertelement <4 x i64> %x, i64 %val, i32 1
@@ -780,9 +780,9 @@ define <8 x i32> @insert_v8i32(<8 x i32>
; SKX: ## BB#0:
; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%val = load i32, i32* %ptr
%r1 = insertelement <8 x i32> %x, i32 %val, i32 1
@@ -846,9 +846,9 @@ define <16 x i16> @insert_v16i16(<16 x i
; SKX: ## BB#0:
; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%val = load i16, i16* %ptr
%r1 = insertelement <16 x i16> %x, i16 %val, i32 1
@@ -912,9 +912,9 @@ define <32 x i8> @insert_v32i8(<32 x i8>
; SKX: ## BB#0:
; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%val = load i8, i8* %ptr
%r1 = insertelement <32 x i8> %x, i8 %val, i32 1
@@ -1014,9 +1014,9 @@ define <16 x i16> @test_insert_128_v16i1
;
; SKX-LABEL: test_insert_128_v16i16:
; SKX: ## BB#0:
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%r = insertelement <16 x i16> %x, i16 %y, i32 10
ret <16 x i16> %r
@@ -1032,9 +1032,9 @@ define <32 x i8> @test_insert_128_v32i8(
;
; SKX-LABEL: test_insert_128_v32i8:
; SKX: ## BB#0:
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
%r = insertelement <32 x i8> %x, i8 %y, i32 20
ret <32 x i8> %r
Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll Mon Jan 2 23:46:18 2017
@@ -237,7 +237,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0
; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: PR29088:
@@ -245,7 +245,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0
; X64-AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512BWVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512BWVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: PR29088:
@@ -253,7 +253,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0
; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
-; X64-AVX512DQVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512DQVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon Jan 2 23:46:18 2017
@@ -30,7 +30,7 @@ define <8 x i32> @test_cmp_b_256(<32 x i
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
-; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
@@ -79,7 +79,7 @@ define <8 x i32> @test_mask_cmp_b_256(<3
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
-; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
@@ -129,7 +129,7 @@ define <8 x i32> @test_ucmp_b_256(<32 x
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
-; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
@@ -178,7 +178,7 @@ define <8 x i32> @test_mask_ucmp_b_256(<
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
-; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll Mon Jan 2 23:46:18 2017
@@ -1565,7 +1565,7 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc2,0x01]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01]
@@ -1585,7 +1585,7 @@ declare <4 x double> @llvm.x86.avx512.ma
define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xd9,0x01]
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
@@ -1605,7 +1605,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.
define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xd9,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon Jan 2 23:46:18 2017
@@ -4778,7 +4778,7 @@ declare <4 x float> @llvm.x86.avx512.mas
define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x19,0xc2,0x01]
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
@@ -4798,7 +4798,7 @@ declare <8 x float> @llvm.x86.avx512.mas
define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xd9,0x01]
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
@@ -4818,7 +4818,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.
define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xd9,0x01]
+; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Jan 2 23:46:18 2017
@@ -1009,7 +1009,7 @@ define void @one_mask_bit_set3(<4 x i64>
;
; SKX-LABEL: one_mask_bit_set3:
; SKX: ## BB#0:
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vmovq %xmm0, 16(%rdi)
; SKX-NEXT: retq
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
@@ -1026,17 +1026,11 @@ define void @one_mask_bit_set4(<4 x doub
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
-; AVX512F-LABEL: one_mask_bit_set4:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vmovhpd %xmm0, 24(%rdi)
-; AVX512F-NEXT: retq
-;
-; SKX-LABEL: one_mask_bit_set4:
-; SKX: ## BB#0:
-; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0
-; SKX-NEXT: vmovhpd %xmm0, 24(%rdi)
-; SKX-NEXT: retq
+; AVX512-LABEL: one_mask_bit_set4:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512-NEXT: vmovhpd %xmm0, 24(%rdi)
+; AVX512-NEXT: retq
call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>)
ret void
}
@@ -1109,19 +1103,12 @@ define <4 x i64> @load_one_mask_bit_set3
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: load_one_mask_bit_set3:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; SKX-LABEL: load_one_mask_bit_set3:
-; SKX: ## BB#0:
-; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; SKX-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1
-; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; SKX-NEXT: retq
+; AVX512-LABEL: load_one_mask_bit_set3:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
ret <4 x i64> %res
}
@@ -1136,19 +1123,12 @@ define <4 x double> @load_one_mask_bit_s
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: load_one_mask_bit_set4:
-; AVX512F: ## BB#0:
-; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: retq
-;
-; SKX-LABEL: load_one_mask_bit_set4:
-; SKX: ## BB#0:
-; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm1
-; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
-; SKX-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
-; SKX-NEXT: retq
+; AVX512-LABEL: load_one_mask_bit_set4:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
ret <4 x double> %res
}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll Mon Jan 2 23:46:18 2017
@@ -488,7 +488,7 @@ define <8 x float> @stack_fold_xorps_ymm
define <4 x float> @stack_fold_extractf32x4(<8 x float> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_extractf32x4
- ;CHECK: vextractf32x4 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
%1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
ret <4 x float> %1
@@ -496,7 +496,7 @@ define <4 x float> @stack_fold_extractf3
define <2 x double> @stack_fold_extractf64x2(<4 x double> %a0, <4 x double> %a1) {
;CHECK-LABEL: stack_fold_extractf64x2
- ;CHECK: vextractf64x2 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
%1 = shufflevector <4 x double> %a0, <4 x double> %a1, <2 x i32> <i32 2, i32 3>
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
ret <2 x double> %1
@@ -504,7 +504,7 @@ define <2 x double> @stack_fold_extractf
define <8 x float> @stack_fold_insertf32x4(<4 x float> %a0, <4 x float> %a1) {
;CHECK-LABEL: stack_fold_insertf32x4
- ;CHECK: vinsertf32x4 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x float> %2
@@ -512,7 +512,7 @@ define <8 x float> @stack_fold_insertf32
define <4 x double> @stack_fold_insertf64x2(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_insertf64x2
- ;CHECK: vinsertf64x2 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <2 x double> %a0, <2 x double> %a1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %2
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Mon Jan 2 23:46:18 2017
@@ -445,7 +445,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <4 x i32> @stack_fold_extracti32x4(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_extracti32x4
- ;CHECK: vextracti32x4 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
; add forces execution domain
%1 = add <8 x i32> %a0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%2 = shufflevector <8 x i32> %1, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -455,7 +455,7 @@ define <4 x i32> @stack_fold_extracti32x
define <2 x i64> @stack_fold_extracti64x2(<4 x i64> %a0, <4 x i64> %a1) {
;CHECK-LABEL: stack_fold_extracti64x2
- ;CHECK: vextracti64x2 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
; add forces execution domain
%1 = add <4 x i64> %a0, <i64 1, i64 1, i64 1, i64 1>
%2 = shufflevector <4 x i64> %1, <4 x i64> %a1, <2 x i32> <i32 2, i32 3>
@@ -465,7 +465,7 @@ define <2 x i64> @stack_fold_extracti64x
define <8 x i32> @stack_fold_inserti32x4(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_inserti32x4
- ;CHECK: vinserti32x4 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; add forces execution domain
@@ -475,7 +475,7 @@ define <8 x i32> @stack_fold_inserti32x4
define <4 x i64> @stack_fold_inserti64x2(<2 x i64> %a0, <2 x i64> %a1) {
;CHECK-LABEL: stack_fold_inserti64x2
- ;CHECK: vinserti64x2 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; add forces execution domain
Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Mon Jan 2 23:46:18 2017
@@ -832,7 +832,7 @@ define <4 x double> @test_broadcast_2f64
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax)
-; X32-AVX512F-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse:
@@ -841,7 +841,7 @@ define <4 x double> @test_broadcast_2f64
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512BW-NEXT: vmovaps %xmm0, (%eax)
-; X32-AVX512BW-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse:
@@ -850,7 +850,7 @@ define <4 x double> @test_broadcast_2f64
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovapd (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovapd %xmm0, (%eax)
-; X32-AVX512DQ-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_2f64_4f64_reuse:
@@ -864,21 +864,21 @@ define <4 x double> @test_broadcast_2f64
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi)
-; X64-AVX512F-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX512BW: ## BB#0:
; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BW-NEXT: vmovaps %xmm0, (%rsi)
-; X64-AVX512BW-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovapd (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovapd %xmm0, (%rsi)
-; X64-AVX512DQ-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <2 x double>, <2 x double>* %p0
store <2 x double> %1, <2 x double>* %p1
@@ -896,32 +896,14 @@ define <4 x i64> @test_broadcast_2i64_4i
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse:
-; X32-AVX512F: ## BB#0:
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
-; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse:
-; X32-AVX512BW: ## BB#0:
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse:
-; X32-AVX512DQ: ## BB#0:
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
-; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: test_broadcast_2i64_4i64_reuse:
+; X32-AVX512: ## BB#0:
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax)
+; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX: ## BB#0:
@@ -930,26 +912,12 @@ define <4 x i64> @test_broadcast_2i64_4i
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse:
-; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
-; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse:
-; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse:
-; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
-; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: test_broadcast_2i64_4i64_reuse:
+; X64-AVX512: ## BB#0:
+; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi)
+; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %p0
store <2 x i64> %1, <2 x i64>* %p1
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -957,37 +925,21 @@ define <4 x i64> @test_broadcast_2i64_4i
}
define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
-; X32-AVX-LABEL: test_broadcast_4f32_8f32_reuse:
-; X32-AVX: ## BB#0:
-; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX-NEXT: vmovaps (%ecx), %xmm0
-; X32-AVX-NEXT: vmovaps %xmm0, (%eax)
-; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX-NEXT: retl
-;
-; X32-AVX512-LABEL: test_broadcast_4f32_8f32_reuse:
-; X32-AVX512: ## BB#0:
-; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512-NEXT: vmovaps (%ecx), %xmm0
-; X32-AVX512-NEXT: vmovaps %xmm0, (%eax)
-; X32-AVX512-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512-NEXT: retl
-;
-; X64-AVX-LABEL: test_broadcast_4f32_8f32_reuse:
-; X64-AVX: ## BB#0:
-; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
-; X64-AVX-NEXT: vmovaps %xmm0, (%rsi)
-; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX-NEXT: retq
-;
-; X64-AVX512-LABEL: test_broadcast_4f32_8f32_reuse:
-; X64-AVX512: ## BB#0:
-; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0
-; X64-AVX512-NEXT: vmovaps %xmm0, (%rsi)
-; X64-AVX512-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: retq
+; X32-LABEL: test_broadcast_4f32_8f32_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm0
+; X32-NEXT: vmovaps %xmm0, (%eax)
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_4f32_8f32_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm0
+; X64-NEXT: vmovaps %xmm0, (%rsi)
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %p0
store <4 x float> %1, <4 x float>* %p1
%2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -1010,7 +962,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_reuse:
@@ -1024,7 +976,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x i32> %1, <4 x i32>* %p1
@@ -1048,7 +1000,7 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse:
@@ -1057,7 +1009,7 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0
; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax)
-; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse:
@@ -1066,7 +1018,7 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_8i16_16i16_reuse:
@@ -1080,21 +1032,21 @@ define <16 x i16> @test_broadcast_8i16_1
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512BW: ## BB#0:
; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi)
-; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <8 x i16>, <8 x i16> *%p0
store <8 x i16> %1, <8 x i16>* %p1
@@ -1118,7 +1070,7 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse:
@@ -1127,7 +1079,7 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0
; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax)
-; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse:
@@ -1136,7 +1088,7 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
-; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_16i8_32i8_reuse:
@@ -1150,21 +1102,21 @@ define <32 x i8> @test_broadcast_16i8_32
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512BW: ## BB#0:
; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi)
-; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
-; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <16 x i8>, <16 x i8> *%p0
store <16 x i8> %1, <16 x i8>* %p1
@@ -1194,7 +1146,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
@@ -1204,7 +1156,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
@@ -1214,7 +1166,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain:
@@ -1230,7 +1182,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
@@ -1238,7 +1190,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
@@ -1246,7 +1198,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Mon Jan 2 23:46:18 2017
@@ -204,7 +204,7 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4
;
; AVX512VL-LABEL: fptosi_4f64_to_4i64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm2
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
@@ -217,7 +217,7 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4
; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
@@ -719,7 +719,7 @@ define <4 x i64> @fptoui_4f64_to_4i64(<4
;
; AVX512VL-LABEL: fptoui_4f64_to_4i64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm2
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
@@ -732,7 +732,7 @@ define <4 x i64> @fptoui_4f64_to_4i64(<4
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
@@ -1097,7 +1097,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
@@ -1205,7 +1205,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8
; AVX512VL-NEXT: vmovq %rcx, %xmm1
; AVX512VL-NEXT: vmovq %rax, %xmm2
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
@@ -1822,7 +1822,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
@@ -2000,7 +2000,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8
; AVX512VL-NEXT: vmovq %rcx, %xmm1
; AVX512VL-NEXT: vmovq %rax, %xmm2
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
@@ -2409,125 +2409,29 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; VEX-LABEL: fptosi_2f128_to_4i32:
-; VEX: # BB#0:
-; VEX-NEXT: pushq %r14
-; VEX-NEXT: pushq %rbx
-; VEX-NEXT: subq $24, %rsp
-; VEX-NEXT: movq %rsi, %r14
-; VEX-NEXT: movq %rdi, %rbx
-; VEX-NEXT: movq %rdx, %rdi
-; VEX-NEXT: movq %rcx, %rsi
-; VEX-NEXT: callq __fixtfdi
-; VEX-NEXT: vmovq %rax, %xmm0
-; VEX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; VEX-NEXT: movq %rbx, %rdi
-; VEX-NEXT: movq %r14, %rsi
-; VEX-NEXT: callq __fixtfdi
-; VEX-NEXT: vmovq %rax, %xmm0
-; VEX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; VEX-NEXT: # xmm0 = xmm0[0],mem[0]
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; VEX-NEXT: addq $24, %rsp
-; VEX-NEXT: popq %rbx
-; VEX-NEXT: popq %r14
-; VEX-NEXT: retq
-;
-; AVX512F-LABEL: fptosi_2f128_to_4i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %r14
-; AVX512F-NEXT: pushq %rbx
-; AVX512F-NEXT: subq $24, %rsp
-; AVX512F-NEXT: movq %rsi, %r14
-; AVX512F-NEXT: movq %rdi, %rbx
-; AVX512F-NEXT: movq %rdx, %rdi
-; AVX512F-NEXT: movq %rcx, %rsi
-; AVX512F-NEXT: callq __fixtfdi
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; AVX512F-NEXT: movq %rbx, %rdi
-; AVX512F-NEXT: movq %r14, %rsi
-; AVX512F-NEXT: callq __fixtfdi
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; AVX512F-NEXT: addq $24, %rsp
-; AVX512F-NEXT: popq %rbx
-; AVX512F-NEXT: popq %r14
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptosi_2f128_to_4i32:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: pushq %r14
-; AVX512VL-NEXT: pushq %rbx
-; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: movq %rsi, %r14
-; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: movq %rdx, %rdi
-; AVX512VL-NEXT: movq %rcx, %rsi
-; AVX512VL-NEXT: callq __fixtfdi
-; AVX512VL-NEXT: vmovq %rax, %xmm0
-; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; AVX512VL-NEXT: movq %rbx, %rdi
-; AVX512VL-NEXT: movq %r14, %rsi
-; AVX512VL-NEXT: callq __fixtfdi
-; AVX512VL-NEXT: vmovq %rax, %xmm0
-; AVX512VL-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; AVX512VL-NEXT: addq $24, %rsp
-; AVX512VL-NEXT: popq %rbx
-; AVX512VL-NEXT: popq %r14
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f128_to_4i32:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: pushq %r14
-; AVX512DQ-NEXT: pushq %rbx
-; AVX512DQ-NEXT: subq $24, %rsp
-; AVX512DQ-NEXT: movq %rsi, %r14
-; AVX512DQ-NEXT: movq %rdi, %rbx
-; AVX512DQ-NEXT: movq %rdx, %rdi
-; AVX512DQ-NEXT: movq %rcx, %rsi
-; AVX512DQ-NEXT: callq __fixtfdi
-; AVX512DQ-NEXT: vmovq %rax, %xmm0
-; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; AVX512DQ-NEXT: movq %rbx, %rdi
-; AVX512DQ-NEXT: movq %r14, %rsi
-; AVX512DQ-NEXT: callq __fixtfdi
-; AVX512DQ-NEXT: vmovq %rax, %xmm0
-; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; AVX512DQ-NEXT: addq $24, %rsp
-; AVX512DQ-NEXT: popq %rbx
-; AVX512DQ-NEXT: popq %r14
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptosi_2f128_to_4i32:
-; AVX512VLDQ: # BB#0:
-; AVX512VLDQ-NEXT: pushq %r14
-; AVX512VLDQ-NEXT: pushq %rbx
-; AVX512VLDQ-NEXT: subq $24, %rsp
-; AVX512VLDQ-NEXT: movq %rsi, %r14
-; AVX512VLDQ-NEXT: movq %rdi, %rbx
-; AVX512VLDQ-NEXT: movq %rdx, %rdi
-; AVX512VLDQ-NEXT: movq %rcx, %rsi
-; AVX512VLDQ-NEXT: callq __fixtfdi
-; AVX512VLDQ-NEXT: vmovq %rax, %xmm0
-; AVX512VLDQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; AVX512VLDQ-NEXT: movq %rbx, %rdi
-; AVX512VLDQ-NEXT: movq %r14, %rsi
-; AVX512VLDQ-NEXT: callq __fixtfdi
-; AVX512VLDQ-NEXT: vmovq %rax, %xmm0
-; AVX512VLDQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; AVX512VLDQ-NEXT: # xmm0 = xmm0[0],mem[0]
-; AVX512VLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; AVX512VLDQ-NEXT: addq $24, %rsp
-; AVX512VLDQ-NEXT: popq %rbx
-; AVX512VLDQ-NEXT: popq %r14
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptosi_2f128_to_4i32:
+; AVX: # BB#0:
+; AVX-NEXT: pushq %r14
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: subq $24, %rsp
+; AVX-NEXT: movq %rsi, %r14
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: movq %rdx, %rdi
+; AVX-NEXT: movq %rcx, %rsi
+; AVX-NEXT: callq __fixtfdi
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: movq %rbx, %rdi
+; AVX-NEXT: movq %r14, %rsi
+; AVX-NEXT: callq __fixtfdi
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX-NEXT: addq $24, %rsp
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: popq %r14
+; AVX-NEXT: retq
%cvt = fptosi <2 x fp128> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %ext
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Mon Jan 2 23:46:18 2017
@@ -288,7 +288,7 @@ define <4 x double> @sitofp_4i64_to_4f64
;
; AVX512VL-LABEL: sitofp_4i64_to_4f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
@@ -299,7 +299,7 @@ define <4 x double> @sitofp_4i64_to_4f64
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f64:
@@ -821,7 +821,7 @@ define <4 x double> @uitofp_4i64_to_4f64
;
; AVX512VL-LABEL: uitofp_4i64_to_4f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
@@ -832,7 +832,7 @@ define <4 x double> @uitofp_4i64_to_4f64
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f64:
@@ -1430,7 +1430,7 @@ define <4 x float> @sitofp_4i64_to_4f32(
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
@@ -2344,7 +2344,7 @@ define <4 x float> @uitofp_4i64_to_4f32(
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
@@ -2775,7 +2775,7 @@ define <4 x double> @sitofp_load_4i64_to
; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
@@ -2786,7 +2786,7 @@ define <4 x double> @sitofp_load_4i64_to
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
@@ -3190,7 +3190,7 @@ define <4 x double> @uitofp_load_4i64_to
; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
@@ -3201,7 +3201,7 @@ define <4 x double> @uitofp_load_4i64_to
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64:
@@ -3426,7 +3426,7 @@ define <4 x float> @sitofp_load_4i64_to_
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
@@ -3667,7 +3667,7 @@ define <8 x float> @sitofp_load_8i64_to_
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
@@ -4013,7 +4013,7 @@ define <4 x float> @uitofp_load_4i64_to_
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
@@ -4593,7 +4593,7 @@ define <8 x float> @uitofp_load_8i64_to_
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Mon Jan 2 23:46:18 2017
@@ -461,7 +461,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = bitcast <8 x i16> %a0 to <8 x half>
%2 = fpext <8 x half> %1 to <8 x float>
@@ -757,7 +757,7 @@ define <16 x float> @cvt_16i16_to_16f32(
;
; AVX512VL-LABEL: cvt_16i16_to_16f32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm10
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm10
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: movq %rax, %rcx
; AVX512VL-NEXT: shrq $48, %rcx
@@ -840,14 +840,14 @@ define <16 x float> @cvt_16i16_to_16f32(
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm7[0],xmm5[0],xmm7[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm16[0],xmm15[0],xmm16[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm14[0],xmm1[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm13[0]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm12[0],xmm11[0],xmm12[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm2, %ymm1
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512VL-NEXT: retq
%1 = bitcast <16 x i16> %a0 to <16 x half>
@@ -1227,7 +1227,7 @@ define <8 x float> @load_cvt_8i16_to_8f3
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = bitcast <8 x i16> %1 to <8 x half>
@@ -1491,14 +1491,14 @@ define <16 x float> @load_cvt_16i16_to_1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm14[0],xmm15[0],xmm14[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm10[0],xmm11[0],xmm10[2,3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3]
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm2, %ymm1
+; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512VL-NEXT: retq
%1 = load <16 x i16>, <16 x i16>* %a0
@@ -1738,7 +1738,7 @@ define <4 x double> @cvt_4i16_to_4f64(<4
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = bitcast <4 x i16> %a0 to <4 x half>
%2 = fpext <4 x half> %1 to <4 x double>
@@ -1929,7 +1929,7 @@ define <4 x double> @cvt_8i16_to_4f64(<8
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = bitcast <4 x i16> %1 to <4 x half>
@@ -2145,14 +2145,14 @@ define <8 x double> @cvt_8i16_to_8f64(<8
; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm6, %ymm4, %ymm4
+; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512VL-NEXT: retq
%1 = bitcast <8 x i16> %a0 to <8 x half>
@@ -2350,7 +2350,7 @@ define <4 x double> @load_cvt_4i16_to_4f
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = load <4 x i16>, <4 x i16>* %a0
%2 = bitcast <4 x i16> %1 to <4 x half>
@@ -2474,7 +2474,7 @@ define <4 x double> @load_cvt_8i16_to_4f
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -2643,14 +2643,14 @@ define <8 x double> @load_cvt_8i16_to_8f
; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm6, %ymm4, %ymm4
+; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4
; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512VL-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a0
@@ -3182,7 +3182,7 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: orl %edx, %eax
; AVX512VL-NEXT: shlq $32, %rax
; AVX512VL-NEXT: orq %rcx, %rax
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; AVX512VL-NEXT: vmovd %xmm1, %ecx
@@ -3427,7 +3427,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; AVX512VL-NEXT: vmovd %xmm2, %eax
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm1, %xmm2
+; AVX512VL-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
@@ -3458,7 +3458,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; AVX512VL-NEXT: vmovd %xmm1, %eax
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
@@ -3479,7 +3479,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<1
; AVX512VL-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%1 = fptrunc <16 x float> %a0 to <16 x half>
%2 = bitcast <16 x half> %1 to <16 x i16>
@@ -3958,7 +3958,7 @@ define void @store_cvt_8f32_to_8i16(<8 x
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; AVX512VL-NEXT: vmovd %xmm1, %r10d
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; AVX512VL-NEXT: vmovd %xmm2, %r11d
@@ -4191,9 +4191,9 @@ define void @store_cvt_16f32_to_16i16(<1
;
; AVX512VL-LABEL: store_cvt_16f32_to_16i16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm2, %xmm3
+; AVX512VL-NEXT: vextractf128 $1, %ymm2, %xmm3
; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm4
; AVX512VL-NEXT: vmovd %xmm4, %eax
; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm4
@@ -4422,7 +4422,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -4572,7 +4572,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -4726,7 +4726,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -4969,7 +4969,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -4994,7 +4994,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -5188,7 +5188,7 @@ define void @store_cvt_4f64_to_4i16(<4 x
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -5357,7 +5357,7 @@ define void @store_cvt_4f64_to_8i16_unde
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -5528,7 +5528,7 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -5775,7 +5775,7 @@ define void @store_cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
@@ -5787,7 +5787,7 @@ define void @store_cvt_8f64_to_8i16(<8 x
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r12d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll Mon Jan 2 23:46:18 2017
@@ -710,35 +710,20 @@ define <32 x i8> @testv32i8(<32 x i8> %i
; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512VLCD-LABEL: testv32i8:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
-; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
-; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
-; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
-; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
-; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: testv32i8:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
-; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
-; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
-; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
-; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
-; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: testv32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
; X32-AVX-LABEL: testv32i8:
; X32-AVX: # BB#0:
@@ -799,35 +784,20 @@ define <32 x i8> @testv32i8u(<32 x i8> %
; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512VLCD-LABEL: testv32i8u:
-; AVX512VLCD: ## BB#0:
-; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
-; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
-; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
-; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
-; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
-; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VLCD-NEXT: retq
-;
-; AVX512CD-LABEL: testv32i8u:
-; AVX512CD: ## BB#0:
-; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
-; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
-; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
-; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
-; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
-; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512CD-NEXT: retq
+; AVX512-LABEL: testv32i8u:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
; X32-AVX-LABEL: testv32i8u:
; X32-AVX: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Mon Jan 2 23:46:18 2017
@@ -1789,25 +1789,15 @@ define <16 x i16> @shuffle_v16i16_00_01_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
ret <16 x i16> %shuffle
}
@@ -1822,23 +1812,14 @@ define <16 x i16> @shuffle_v16i16_06_07_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
ret <16 x i16> %shuffle
}
@@ -1885,23 +1866,14 @@ define <16 x i16> @shuffle_v16i16_00_00_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i16> %shuffle
}
@@ -1919,29 +1891,17 @@ define <16 x i16> @shuffle_v16i16_00_00_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -1957,25 +1917,15 @@ define <16 x i16> @shuffle_v16i16_uu_00_
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
ret <16 x i16> %shuffle
}
@@ -1991,25 +1941,15 @@ define <16 x i16> @shuffle_v16i16_uu_04_
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
ret <16 x i16> %shuffle
}
@@ -2026,27 +1966,16 @@ define <16 x i16> @shuffle_v16i16_03_01_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
ret <16 x i16> %shuffle
}
@@ -2062,25 +1991,15 @@ define <16 x i16> @shuffle_v16i16_04_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
ret <16 x i16> %shuffle
}
@@ -2095,23 +2014,14 @@ define <16 x i16> @shuffle_v16i16_02_03_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
ret <16 x i16> %shuffle
}
@@ -2128,27 +2038,16 @@ define <16 x i16> @shuffle_v16i16_02_03_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
ret <16 x i16> %shuffle
}
@@ -2164,25 +2063,15 @@ define <16 x i16> @shuffle_v16i16_02_03_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
-; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
-; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
+; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
ret <16 x i16> %shuffle
}
@@ -2210,12 +2099,12 @@ define <16 x i16> @shuffle_v16i16_07_05_
;
; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
ret <16 x i16> %shuffle
@@ -2232,25 +2121,15 @@ define <16 x i16> @shuffle_v16i16_01_00_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
ret <16 x i16> %shuffle
}
@@ -2266,25 +2145,15 @@ define <16 x i16> @shuffle_v16i16_05_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
ret <16 x i16> %shuffle
}
@@ -2300,25 +2169,15 @@ define <16 x i16> @shuffle_v16i16_05_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
ret <16 x i16> %shuffle
}
@@ -2334,25 +2193,15 @@ define <16 x i16> @shuffle_v16i16_00_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
ret <16 x i16> %shuffle
}
@@ -2368,25 +2217,15 @@ define <16 x i16> @shuffle_v16i16_04_00_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
ret <16 x i16> %shuffle
}
@@ -2414,12 +2253,12 @@ define <16 x i16> @shuffle_v16i16_02_06_
;
; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
ret <16 x i16> %shuffle
@@ -2448,12 +2287,12 @@ define <16 x i16> @shuffle_v16i16_02_00_
;
; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
ret <16 x i16> %shuffle
@@ -2482,12 +2321,12 @@ define <16 x i16> @shuffle_v16i16_02_06_
;
; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
ret <16 x i16> %shuffle
@@ -2516,12 +2355,12 @@ define <16 x i16> @shuffle_v16i16_06_06_
;
; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
ret <16 x i16> %shuffle
@@ -2538,25 +2377,15 @@ define <16 x i16> @shuffle_v16i16_00_00_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2572,25 +2401,15 @@ define <16 x i16> @shuffle_v16i16_04_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2606,25 +2425,15 @@ define <16 x i16> @shuffle_v16i16_00_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2640,25 +2449,15 @@ define <16 x i16> @shuffle_v16i16_00_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i16> %shuffle
}
@@ -2675,27 +2474,16 @@ define <16 x i16> @shuffle_v16i16_00_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle
}
@@ -2711,25 +2499,15 @@ define <16 x i16> @shuffle_v16i16_00_uu_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2745,25 +2523,15 @@ define <16 x i16> @shuffle_v16i16_04_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2779,25 +2547,15 @@ define <16 x i16> @shuffle_v16i16_uu_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
}
@@ -2848,13 +2606,13 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
;
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm2
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
ret <16 x i16> %shuffle
@@ -2926,7 +2684,7 @@ define <16 x i16> @shuffle_v16i16_00_01_
;
; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
@@ -2934,7 +2692,7 @@ define <16 x i16> @shuffle_v16i16_00_01_
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,5,4,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,1,2]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
ret <16 x i16> %shuffle
@@ -2961,13 +2719,13 @@ define <16 x i16> @shuffle_v16i16_04_05_
;
; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,2,1,4,5,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,3,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
ret <16 x i16> %shuffle
@@ -2996,12 +2754,12 @@ define <16 x i16> @shuffle_v16i16_03_07_
;
; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
ret <16 x i16> %shuffle
@@ -3693,23 +3451,14 @@ define <16 x i16> @shuffle_v16i16_05_06_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
-; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
-; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
-; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
-; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
+; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i16> %shuffle
}
@@ -3809,23 +3558,14 @@ define <16 x i16> @shuffle_v16i16_03_04_
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
-; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
-; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
}
@@ -4000,17 +3740,11 @@ define <16 x i16> @shuffle_v16i16_u_u_u_
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
ret <16 x i16> %shuffle
}
@@ -4023,19 +3757,12 @@ define <16 x i16> @shuffle_v16i16_u_u_u_
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <16 x i16> %shuffle
}
@@ -4049,17 +3776,11 @@ define <16 x i16> @shuffle_v16i16_8_8_8_
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i16> %shuffle
}
@@ -4091,19 +3812,12 @@ define <16 x i16> @shuffle_v16i16_9_9_9_
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Mon Jan 2 23:46:18 2017
@@ -1693,17 +1693,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX2: # BB#0:
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
ret <32 x i8> %shuffle
}
@@ -1774,19 +1768,12 @@ define <32 x i8> @shuffle_v32i8_00_32_01
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
ret <32 x i8> %shuffle
}
@@ -2175,7 +2162,7 @@ define <32 x i8> @shuffle_v32i8_uu_uu_uu
; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
ret <32 x i8> %shuffle
@@ -2190,17 +2177,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <32 x i8> %shuffle
}
@@ -2264,7 +2245,7 @@ define <32 x i8> @shuffle_v32i8_22_22_22
;
; AVX512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Mon Jan 2 23:46:18 2017
@@ -320,39 +320,19 @@ define <4 x double> @shuffle_v4f64_4163(
}
define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_0145:
-; AVX1: # BB#0:
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_0145:
-; AVX2: # BB#0:
-; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_0145:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_0145:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_4501:
-; AVX1: # BB#0:
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_4501:
-; AVX2: # BB#0:
-; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_4501:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_4501:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %shuffle
}
@@ -367,23 +347,11 @@ define <4 x double> @shuffle_v4f64_0167(
}
define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_1054:
-; AVX1: # BB#0:
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_1054:
-; AVX2: # BB#0:
-; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_1054:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_1054:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
ret <4 x double> %shuffle
}
@@ -735,7 +703,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4
;
; AVX512VL-LABEL: shuffle_v4i64_0142:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX512VL-NEXT: retq
@@ -808,7 +776,7 @@ define <4 x i64> @shuffle_v4i64_0145(<4
;
; AVX512VL-LABEL: shuffle_v4i64_0145:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i64> %shuffle
@@ -852,7 +820,7 @@ define <4 x i64> @shuffle_v4i64_4501(<4
;
; AVX512VL-LABEL: shuffle_v4i64_4501:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i64> %shuffle
@@ -948,7 +916,7 @@ define <4 x i64> @shuffle_v4i64_1054(<4
;
; AVX512VL-LABEL: shuffle_v4i64_1054:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
@@ -1424,7 +1392,7 @@ define <4 x i64> @concat_v4i64_0145_bc(<
;
; AVX512VL-LABEL: concat_v4i64_0145_bc:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
%a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Mon Jan 2 23:46:18 2017
@@ -753,17 +753,11 @@ define <8 x float> @shuffle_v8f32_765432
}
define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_3210ba98:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_3210ba98:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_3210ba98:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
ret <8 x float> %shuffle
}
@@ -829,17 +823,11 @@ define <8 x float> @shuffle_v8f32_ba9876
}
define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_ba983210:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_ba983210:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinsertf64x2 $1, %xmm0, %ymm1, %ymm0
-; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_ba983210:
+; ALL: # BB#0:
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %shuffle
}
@@ -863,17 +851,11 @@ define <8 x float> @shuffle_v8f32_a2u3e6
}
define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_uuuu1111:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_uuuu1111:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_uuuu1111:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
ret <8 x float> %shuffle
}
@@ -885,17 +867,11 @@ define <8 x float> @shuffle_v8f32_444444
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8f32_44444444:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_44444444:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v8f32_44444444:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %shuffle
}
@@ -910,33 +886,21 @@ define <8 x float> @shuffle_v8f32_1188uu
}
define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_uuuu3210:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_uuuu3210:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_uuuu3210:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
ret <8 x float> %shuffle
}
define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_uuuu1188:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_uuuu1188:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
-; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_uuuu1188:
+; ALL: # BB#0:
+; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
ret <8 x float> %shuffle
}
@@ -951,17 +915,11 @@ define <8 x float> @shuffle_v8f32_1111uu
}
define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_5555uuuu:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_5555uuuu:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_5555uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %shuffle
}
@@ -1900,17 +1858,11 @@ define <8 x i32> @shuffle_v8i32_3210ba98
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8i32_3210ba98:
-; AVX2: # BB#0:
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i32_3210ba98:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v8i32_3210ba98:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
ret <8 x i32> %shuffle
}
@@ -2047,17 +1999,11 @@ define <8 x i32> @shuffle_v8i32_uuuu1111
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8i32_uuuu1111:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i32_uuuu1111:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v8i32_uuuu1111:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %shuffle
}
@@ -2095,7 +2041,7 @@ define <8 x i32> @shuffle_v8i32_44444444
;
; AVX512VL-LABEL: shuffle_v8i32_44444444:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
@@ -2109,17 +2055,11 @@ define <8 x i32> @shuffle_v8i32_5555uuuu
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v8i32_5555uuuu:
-; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8i32_5555uuuu:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v8i32_5555uuuu:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %shuffle
}
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=290869&r1=290868&r2=290869&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Mon Jan 2 23:46:18 2017
@@ -643,7 +643,7 @@ define void @trunc32i16_32i8(<32 x i16>
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%rax)
; AVX512VL-NEXT: retq
;
@@ -701,7 +701,7 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x
; AVX512VL: # BB#0: # %entry
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1
-; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc2x4i64_8i32:
@@ -717,7 +717,7 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x
; AVX512BWVL: # BB#0: # %entry
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1
-; AVX512BWVL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: retq
entry:
%0 = trunc <4 x i64> %a to <4 x i32>
More information about the llvm-commits
mailing list