[llvm] 14d1e50 - [X86] combineConcatVectorOps - fold a 512-bit splat of a 128-bit subvector to a single X86ISD::SHUF128 node.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 04:14:17 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-11T12:14:02+01:00
New Revision: 14d1e502df1613659e82290d7e31179a2ac97234
URL: https://github.com/llvm/llvm-project/commit/14d1e502df1613659e82290d7e31179a2ac97234
DIFF: https://github.com/llvm/llvm-project/commit/14d1e502df1613659e82290d7e31179a2ac97234.diff
LOG: [X86] combineConcatVectorOps - fold a 512-bit splat of a 128-bit subvector to a single X86ISD::SHUF128 node.
Replaces a pair of insert_subvectors with a single (implicitly widened) vector - also reduce uses of the src.
Hopefully this should address most of the remaining widen subvector regressions I'm seeing while trying to aggressively convert TRUNCATE to PACKSS/PACKUS.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/pr36199.ll
llvm/test/CodeGen/X86/subvector-broadcast.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dbe4477a7d22bf..91a0b2c6b5af1c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54847,6 +54847,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return getConstVector(EltBits, UndefElts, VT, DAG, DL);
}
+ // If we're splatting a 128-bit subvector to 512-bits, use SHUF128 directly.
+ if (IsSplat && NumOps == 4 && VT.is512BitVector() &&
+ Subtarget.useAVX512Regs()) {
+ MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;
+ SDValue Res = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
+ Res = DAG.getBitcast(ShuffleVT, Res);
+ Res = DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, Res, Res,
+ getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
+ return DAG.getBitcast(VT, Res);
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index 5a167600499b1e..a5a4bf1e53631e 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -6451,9 +6451,9 @@ declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x
define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
; X86: ## %bb.0:
-; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
-; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
+; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc0,0x00]
+; X86-NEXT: ## zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
; X86-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
@@ -6463,9 +6463,9 @@ define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0,
;
; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
; X64: ## %bb.0:
-; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
-; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
+; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc0,0x00]
+; X64-NEXT: ## zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
; X64-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
@@ -6583,9 +6583,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32
define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
; X86: ## %bb.0:
-; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
-; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
+; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
+; X86-NEXT: ## zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
; X86-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
@@ -6593,9 +6593,9 @@ define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_broadcas
;
; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
; X64: ## %bb.0:
-; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
-; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
+; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
+; X64-NEXT: ## zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
; X64-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
index 205173a795b351..23e929aa9d89b1 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
@@ -393,9 +393,9 @@ declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x
define <8 x double>@test_int_x86_avx512_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2) {
; CHECK-LABEL: test_int_x86_avx512_broadcastf64x2_512:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
-; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc0,0x00]
+; CHECK-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
@@ -405,19 +405,19 @@ define <8 x double>@test_int_x86_avx512_broadcastf64x2_512(<2 x double> %x0, <8
define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
+; X86-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x23,0xc8,0x00]
+; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: vmovapd %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
+; X64-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x23,0xc8,0x00]
+; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: vmovapd %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
@@ -428,18 +428,18 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0
define <8 x double>@test_int_x86_avx512_maskz_broadcastf64x2_512(<2 x double> %x0, i8 %mask) {
; X86-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_512:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
+; X86-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc0,0x00]
+; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_broadcastf64x2_512:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
+; X64-NEXT: vshuff64x2 $0, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc0,0x00]
+; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
@@ -546,9 +546,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>,
define <8 x i64>@test_int_x86_avx512_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2) {
; CHECK-LABEL: test_int_x86_avx512_broadcasti64x2_512:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
-; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
+; CHECK-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
@@ -558,19 +558,19 @@ define <8 x i64>@test_int_x86_avx512_broadcasti64x2_512(<2 x i64> %x0, <8 x i64>
define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
+; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x43,0xc8,0x00]
+; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
+; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x43,0xc8,0x00]
+; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
@@ -581,18 +581,18 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x
define <8 x i64>@test_int_x86_avx512_maskz_broadcasti64x2_512(<2 x i64> %x0, i8 %mask) {
; X86-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_512:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
+; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x43,0xc0,0x00]
+; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_maskz_broadcasti64x2_512:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
+; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x43,0xc0,0x00]
+; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
diff --git a/llvm/test/CodeGen/X86/pr36199.ll b/llvm/test/CodeGen/X86/pr36199.ll
index 8325ccdb369afe..ec18124faad78e 100644
--- a/llvm/test/CodeGen/X86/pr36199.ll
+++ b/llvm/test/CodeGen/X86/pr36199.ll
@@ -5,9 +5,8 @@ define void @foo(<16 x float> %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; CHECK-NEXT: vmovups %zmm0, (%rax)
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: vmovupd %zmm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%1 = fadd <16 x float> %x, %x
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 5b6d9cd463c71c..b6a245a98634ff 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -1153,9 +1153,8 @@ define <8 x double> @reg_broadcast_2f64_8f64(<2 x double> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_2f64_8f64:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_2f64_8f64:
@@ -1167,9 +1166,8 @@ define <8 x double> @reg_broadcast_2f64_8f64(<2 x double> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_2f64_8f64:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x double> %1
@@ -1227,9 +1225,8 @@ define <8 x i64> @reg_broadcast_2i64_8i64(<2 x i64> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_2i64_8i64:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_2i64_8i64:
@@ -1241,9 +1238,8 @@ define <8 x i64> @reg_broadcast_2i64_8i64(<2 x i64> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_2i64_8i64:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x i64> %1
@@ -1301,9 +1297,8 @@ define <16 x float> @reg_broadcast_4f32_16f32(<4 x float> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_4f32_16f32:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_4f32_16f32:
@@ -1315,9 +1310,8 @@ define <16 x float> @reg_broadcast_4f32_16f32(<4 x float> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_4f32_16f32:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x float> %1
@@ -1375,9 +1369,8 @@ define <16 x i32> @reg_broadcast_4i32_16i32(<4 x i32> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_4i32_16i32:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_4i32_16i32:
@@ -1389,9 +1382,8 @@ define <16 x i32> @reg_broadcast_4i32_16i32(<4 x i32> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_4i32_16i32:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i32> %1
@@ -1449,9 +1441,8 @@ define <32 x i16> @reg_broadcast_8i16_32i16(<8 x i16> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_8i16_32i16:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_8i16_32i16:
@@ -1463,9 +1454,8 @@ define <32 x i16> @reg_broadcast_8i16_32i16(<8 x i16> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_8i16_32i16:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <32 x i16> %1
@@ -1523,9 +1513,8 @@ define <64 x i8> @reg_broadcast_16i8_64i8(<16 x i8> %a0) nounwind {
;
; X86-AVX512-LABEL: reg_broadcast_16i8_64i8:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X86-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X86-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X86-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X86-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X86-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_16i8_64i8:
@@ -1537,9 +1526,8 @@ define <64 x i8> @reg_broadcast_16i8_64i8(<16 x i8> %a0) nounwind {
;
; X64-AVX512-LABEL: reg_broadcast_16i8_64i8:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; X64-AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <64 x i8> %1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 6fd61d2a5dfa50..370a69696d850d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -832,9 +832,8 @@ define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13
define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) {
; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
; ALL: # %bb.0:
-; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; ALL-NEXT: retq
%res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i32> %res
@@ -843,9 +842,8 @@ define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03
define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) {
; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
; ALL: # %bb.0:
-; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; ALL-NEXT: retq
%res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x float> %res
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index b54073a5de7128..96ba108716fa2c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -2164,9 +2164,8 @@ define <8 x double> @shuffle_v4f64_v8f64_22222222(<4 x double> %a) {
define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
; ALL-LABEL: shuffle_v2i64_v8i64_01010101:
; ALL: # %bb.0:
-; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; ALL-NEXT: ret{{[l|q]}}
%shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x i64> %shuffle
@@ -2175,9 +2174,8 @@ define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
; ALL-LABEL: shuffle_v2f64_v8f64_01010101:
; ALL: # %bb.0:
-; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; ALL-NEXT: ret{{[l|q]}}
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x double> %shuffle
More information about the llvm-commits
mailing list