[llvm] [X86][AVX] Prefer per-element vector shifts for known splats #39424 (PR #87913)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 02:15:51 PDT 2024
https://github.com/SahilPatidar updated https://github.com/llvm/llvm-project/pull/87913
>From e519e1f4a726f1efaac38327d9d1e3420116334b Mon Sep 17 00:00:00 2001
From: SahilPatidar <patidarsahil2001 at gmail.com>
Date: Sat, 6 Apr 2024 15:30:53 +0530
Subject: [PATCH 1/3] [X86][AVX] Prefer VPSRAV to VPSRA style shifts for known
splats #39424
---
llvm/lib/Target/X86/X86.td | 7 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +-
llvm/test/CodeGen/X86/avx2-vector-shifts.ll | 4 +-
llvm/test/CodeGen/X86/vector-fshl-128.ll | 102 ++++---
llvm/test/CodeGen/X86/vector-fshl-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshl-512.ll | 66 +++--
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 59 ++--
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 40 +--
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-fshr-128.ll | 100 ++++---
llvm/test/CodeGen/X86/vector-fshr-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshr-512.ll | 66 +++--
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 51 ++--
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 36 ++-
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-rotate-128.ll | 50 ++--
llvm/test/CodeGen/X86/vector-rotate-256.ll | 38 +--
llvm/test/CodeGen/X86/vector-rotate-512.ll | 22 +-
.../test/CodeGen/X86/vector-shift-ashr-128.ll | 235 ++++++++++-----
.../test/CodeGen/X86/vector-shift-ashr-256.ll | 222 +++++++++-----
.../test/CodeGen/X86/vector-shift-ashr-512.ll | 30 +-
.../CodeGen/X86/vector-shift-ashr-sub128.ll | 170 +++++++----
.../test/CodeGen/X86/vector-shift-lshr-128.ll | 273 ++++++++++++------
.../test/CodeGen/X86/vector-shift-lshr-256.ll | 182 ++++++++----
.../test/CodeGen/X86/vector-shift-lshr-512.ll | 26 +-
.../CodeGen/X86/vector-shift-lshr-sub128.ll | 170 +++++++----
llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 273 ++++++++++++------
llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 182 ++++++++----
llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 26 +-
.../CodeGen/X86/vector-shift-shl-sub128.ll | 170 +++++++----
llvm/test/CodeGen/X86/vselect-avx.ll | 16 +-
31 files changed, 1826 insertions(+), 1018 deletions(-)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 78bc043911f2fc7..a9f3ae1f847552e 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -599,6 +599,10 @@ def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
"PreferLowerShuffleAsShift", "true",
"Shifts are faster (or as fast) as shuffle">;
+def TuningPreferPerEltVectorShift : SubtargetFeature<"tuning-fast-per-element-vector-shift",
+ "PreferPerEltVectorShift", "true",
+ "Vector per element shifts are faster (1/cycle latency)">;
+
def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
"FastImmVectorShift", "true",
"Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
@@ -996,7 +1000,8 @@ def ProcessorFeatures {
TuningNoDomainDelayMov,
TuningNoDomainDelayShuffle,
TuningNoDomainDelayBlend,
- TuningFastImmVectorShift];
+ TuningFastImmVectorShift,
+ TuningPreferPerEltVectorShift];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bedec0c8974a857..b071396fef01d6a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29346,8 +29346,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
return V;
- if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
- return V;
+ if (!supportedVectorVarShift(VT, Subtarget, Opc) &&
+ !Subtarget.preferPerEltVectorShift())
+ if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
+ return V;
if (supportedVectorVarShift(VT, Subtarget, Opc))
return Op;
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
index 983c69d1a1c2e87..6ad8106bba6d4fc 100644
--- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -61,14 +61,14 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
; X86: # %bb.0:
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
+; X86-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_vpslld_var:
; X64: # %bb.0:
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
+; X64-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%amt = insertelement <8 x i32> undef, i32 %shift, i32 0
%tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 1addedf3c3d9601..577a86dff54e96b 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -992,47 +992,62 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1048,12 +1063,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1063,16 +1079,28 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1
-; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1255,13 +1283,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1276,12 +1307,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index ebcb1cb15a600ed..e11f26e10b0ea69 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -778,45 +778,49 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512F-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512BW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -831,12 +835,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VLBW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -866,12 +871,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1049,12 +1055,14 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1069,12 +1077,13 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index e23855361e57a23..fe8af191d0ff406 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -426,34 +426,37 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512F-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -465,12 +468,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VLBW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -580,12 +584,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -597,12 +602,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 6b5ba7042c5c51d..73a81614f9a9374 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -750,17 +750,30 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsllvq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -990,23 +1003,27 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 731a9f4e4037306..8c3e27aaba1fb1c 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -587,13 +587,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpsllvq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -806,23 +807,26 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index edfa56a70d59e92..85a43ea02034d57 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -334,23 +334,25 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 638a3cdaa2c1d26..56333a69c3e694e 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1113,47 +1113,62 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1169,12 +1184,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1185,16 +1201,28 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1380,13 +1408,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1401,12 +1432,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 3fabf720da71c3b..20143d1f9fe66d7 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -808,45 +808,49 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -861,12 +865,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -897,12 +902,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1081,12 +1087,14 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1101,12 +1109,13 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 91dd83050e17e61..259e5a5f4711549 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -424,34 +424,37 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -464,12 +467,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -582,12 +586,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -600,12 +605,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 01578d399b774f5..50603fa03e9b23f 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -777,17 +777,30 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlvq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -1027,23 +1040,27 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm0, %xmm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index 0aa91b74e12cabc..36872bc144864fc 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -618,13 +618,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -845,23 +846,26 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpandn %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm0, %ymm3
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 4364c047dfdebfc..d262a866bf6f438 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -332,23 +332,25 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 2e21f8d0aa32a1d..20797cd22d57372 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -680,14 +680,24 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_rotate_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
-; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_rotate_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_rotate_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v2i64:
; AVX512NOVLX: # %bb.0:
@@ -860,23 +870,27 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_rotate_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index b5f0522327a4482..a2a01a2777f7e4c 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -531,11 +531,12 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [64,64,64,64]
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v4i64:
@@ -697,23 +698,26 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 29afbf4c62ef5ad..c8a5db70af8bd47 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -347,23 +347,25 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 53b6aca3e9fcba9..006b034ec1674a1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -645,9 +645,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v2i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -671,14 +672,16 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -708,28 +711,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -769,17 +784,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -872,18 +902,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -891,18 +923,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -957,10 +991,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v2i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -986,16 +1021,18 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -1020,28 +1057,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrad %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -1074,17 +1130,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -1178,20 +1251,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1199,20 +1274,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1815,9 +1892,10 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -1844,15 +1922,18 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovd %edi, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: PR52719:
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 9a483c345f92cb1..186e3dc12c5137a 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -670,9 +670,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -690,9 +691,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -700,13 +702,15 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -726,9 +730,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -749,8 +754,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -764,20 +769,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -791,8 +796,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i32> %a, %splat
@@ -830,17 +835,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -936,9 +955,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -958,9 +978,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -1026,10 +1047,12 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1048,10 +1071,12 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -1059,15 +1084,18 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -1089,9 +1117,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -1113,8 +1142,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1128,20 +1159,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1155,8 +1191,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -1195,17 +1233,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1304,10 +1358,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1327,10 +1382,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -2134,9 +2190,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -2159,9 +2217,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; XOPAVX2-LABEL: PR52719:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovd %edi, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -2170,14 +2230,17 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovd %edi, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: PR52719:
@@ -2198,10 +2261,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index a2fe36e72f6b9c9..796c05d4d75ec62 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -148,7 +148,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i64> %a, %splat
@@ -158,8 +159,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i32> %a, %splat
@@ -178,8 +179,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i16> %a, %splat
@@ -233,8 +234,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -245,8 +248,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -267,7 +272,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
@@ -495,7 +501,9 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
; ALL-LABEL: PR52719:
; ALL: # %bb.0:
; ALL-NEXT: vmovd %edi, %xmm1
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %ymm1
+; ALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%vec = insertelement <8 x i32> poison, i32 %a1, i64 0
%splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index 36a6226f8f4b9c2..41c900b1f6879c1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -1167,28 +1167,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -1228,17 +1240,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1277,17 +1304,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1381,18 +1423,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1400,18 +1444,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1519,18 +1565,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1538,18 +1586,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1648,18 +1698,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1667,18 +1719,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index ca8343cd4812cdd..a8c48d3f658c9ca 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -520,24 +520,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -563,28 +577,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -624,17 +650,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -711,18 +752,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -730,18 +773,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -775,28 +820,44 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -817,28 +878,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -871,17 +951,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -959,20 +1056,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -980,20 +1079,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index e65f78e49dc8d27..6cc1261defbe82f 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -534,7 +534,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -547,17 +548,20 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -570,7 +574,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = lshr <4 x i64> %a, %splat
@@ -589,8 +594,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -604,20 +609,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -631,8 +636,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i32> %a, %splat
@@ -670,17 +675,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -762,9 +781,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -781,9 +801,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -832,8 +853,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -847,20 +870,25 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -875,7 +903,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -895,8 +924,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -910,20 +941,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -937,8 +973,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -977,17 +1015,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1072,10 +1126,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1092,10 +1147,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index efd73b4ca132bbf..1fbd0ea9b9f1573 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -112,7 +112,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i64> %a, %splat
@@ -122,8 +123,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i32> %a, %splat
@@ -142,8 +143,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i16> %a, %splat
@@ -188,8 +189,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -200,8 +203,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -222,7 +227,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 74ba1d04161f87b..108e383dd7a029f 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -933,28 +933,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -994,17 +1006,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1043,17 +1070,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1131,18 +1173,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1150,18 +1194,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1249,18 +1295,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1268,18 +1316,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1358,18 +1408,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1377,18 +1429,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index c54da38ef10cc18..4b3b35e9fee8752 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -431,24 +431,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -474,28 +488,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -535,17 +561,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -619,18 +660,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -638,18 +681,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -682,28 +727,44 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -724,28 +785,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: pslld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -778,17 +858,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -863,20 +960,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -884,20 +983,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 6dde209e94d8114..9f0b8a079517bd2 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -464,7 +464,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -477,17 +478,20 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -500,7 +504,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
@@ -519,8 +524,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -534,20 +539,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -561,8 +566,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
@@ -600,17 +605,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -688,9 +707,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -706,9 +726,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -757,8 +778,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -772,20 +795,25 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -800,7 +828,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -820,8 +849,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -835,20 +866,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -862,8 +898,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -902,17 +940,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -993,10 +1047,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1012,10 +1067,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 8eca56d099feb38..bc03821cea22e11 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -107,7 +107,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i64> %a, %splat
@@ -117,8 +118,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i32> %a, %splat
@@ -137,8 +138,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i16> %a, %splat
@@ -181,8 +182,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -193,8 +196,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -215,7 +220,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
index d545cb77cba2e49..08150335b63bb3c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
@@ -786,28 +786,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -847,17 +859,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -896,17 +923,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -980,18 +1022,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -999,18 +1043,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1093,18 +1139,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1112,18 +1160,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1199,18 +1249,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1218,18 +1270,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 8dda27145bd374e..ee3c0da25537692 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -227,10 +227,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
;
; AVX2-LABEL: blendv_split:
; AVX2: ## %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
-; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastd %xmm3, %ymm3
+; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpsllvd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vmovups %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
@@ -241,10 +241,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
-; AVX512-NEXT: vpslld %xmm2, %ymm1, %ymm2
-; AVX512-NEXT: vpslld %xmm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vpbroadcastd %xmm2, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm3, %ymm2
+; AVX512-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
+; AVX512-NEXT: vpsllvd %ymm0, %ymm1, %ymm2 {%k1}
; AVX512-NEXT: vmovdqu %ymm2, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
>From 036aa927d5e2fba9773d86af8fb4e9fec238b03e Mon Sep 17 00:00:00 2001
From: SahilPatidar <patidarsahil2001 at gmail.com>
Date: Tue, 9 Apr 2024 12:37:24 +0530
Subject: [PATCH 2/3] Fix logic code and update tests
---
llvm/lib/Target/X86/X86.td | 2 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
llvm/test/CodeGen/X86/avx2-vector-shifts.ll | 4 +-
llvm/test/CodeGen/X86/vector-fshl-128.ll | 102 +++----
llvm/test/CodeGen/X86/vector-fshl-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshl-512.ll | 66 ++---
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 59 ++--
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 40 ++-
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-fshr-128.ll | 100 +++----
llvm/test/CodeGen/X86/vector-fshr-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshr-512.ll | 66 ++---
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 51 ++--
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 36 +--
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-rotate-128.ll | 50 ++--
llvm/test/CodeGen/X86/vector-rotate-256.ll | 38 ++-
llvm/test/CodeGen/X86/vector-rotate-512.ll | 22 +-
.../test/CodeGen/X86/vector-shift-ashr-128.ll | 235 +++++----------
.../test/CodeGen/X86/vector-shift-ashr-256.ll | 222 +++++---------
.../test/CodeGen/X86/vector-shift-ashr-512.ll | 30 +-
.../CodeGen/X86/vector-shift-ashr-sub128.ll | 170 ++++-------
.../test/CodeGen/X86/vector-shift-lshr-128.ll | 273 ++++++------------
.../test/CodeGen/X86/vector-shift-lshr-256.ll | 182 ++++--------
.../test/CodeGen/X86/vector-shift-lshr-512.ll | 26 +-
.../CodeGen/X86/vector-shift-lshr-sub128.ll | 170 ++++-------
llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 273 ++++++------------
llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 182 ++++--------
llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 26 +-
.../CodeGen/X86/vector-shift-shl-sub128.ll | 170 ++++-------
llvm/test/CodeGen/X86/vselect-avx.ll | 16 +-
31 files changed, 1017 insertions(+), 1818 deletions(-)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a9f3ae1f847552e..b93241e4317722c 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -601,7 +601,7 @@ def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
def TuningPreferPerEltVectorShift : SubtargetFeature<"tuning-fast-per-element-vector-shift",
"PreferPerEltVectorShift", "true",
- "Vector per element shifts are faster (1/cycle latency)">;
+ "Vector per element shifts are faster">;
def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
"FastImmVectorShift", "true",
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b071396fef01d6a..289b2a31d17a855 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29346,7 +29346,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
return V;
- if (!supportedVectorVarShift(VT, Subtarget, Opc) &&
+ if (!supportedVectorVarShift(VT, Subtarget, Opc) ||
!Subtarget.preferPerEltVectorShift())
if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
return V;
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
index 6ad8106bba6d4fc..983c69d1a1c2e87 100644
--- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -61,14 +61,14 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
; X86: # %bb.0:
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X86-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
+; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_vpslld_var:
; X64: # %bb.0:
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X64-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
+; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%amt = insertelement <8 x i32> undef, i32 %shift, i32 0
%tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 577a86dff54e96b..1addedf3c3d9601 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -992,62 +992,47 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1063,13 +1048,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1079,28 +1063,16 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1
+; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1283,16 +1255,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1307,13 +1276,12 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index e11f26e10b0ea69..ebcb1cb15a600ed 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -778,49 +778,45 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -835,13 +831,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -871,13 +866,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1055,14 +1049,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1077,13 +1069,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index fe8af191d0ff406..e23855361e57a23 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -426,37 +426,34 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -468,13 +465,12 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -584,13 +580,12 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -602,13 +597,12 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
-; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 73a81614f9a9374..6b5ba7042c5c51d 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -750,30 +750,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsllvq %xmm3, %xmm0, %xmm3
-; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3
+; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -1003,27 +990,23 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
-; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 8c3e27aaba1fb1c..731a9f4e4037306 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -587,14 +587,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpsllvq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -807,26 +806,23 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
-; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 85a43ea02034d57..edfa56a70d59e92 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -334,25 +334,23 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 56333a69c3e694e..638a3cdaa2c1d26 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1113,62 +1113,47 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1184,13 +1169,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1201,28 +1185,16 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1408,16 +1380,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1432,13 +1401,12 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 20143d1f9fe66d7..3fabf720da71c3b 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -808,49 +808,45 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -865,13 +861,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -902,13 +897,12 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1087,14 +1081,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1109,13 +1101,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 259e5a5f4711549..91dd83050e17e61 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -424,37 +424,34 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -467,13 +464,12 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
-; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -586,13 +582,12 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -605,13 +600,12 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
-; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 50603fa03e9b23f..01578d399b774f5 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -777,30 +777,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsrlvq %xmm3, %xmm0, %xmm3
-; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
+; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -1040,27 +1027,23 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm0, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index 36872bc144864fc..0aa91b74e12cabc 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -618,14 +618,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -846,26 +845,23 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpandn %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
-; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm0, %ymm3
-; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index d262a866bf6f438..4364c047dfdebfc 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -332,25 +332,23 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 20797cd22d57372..2e21f8d0aa32a1d 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -680,24 +680,14 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_rotate_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
-; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_rotate_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
-; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_rotate_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
+; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v2i64:
; AVX512NOVLX: # %bb.0:
@@ -870,27 +860,23 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_rotate_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
-; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index a2a01a2777f7e4c..b5f0522327a4482 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -531,12 +531,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [64,64,64,64]
-; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v4i64:
@@ -698,26 +697,23 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
-; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
-; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index c8a5db70af8bd47..29afbf4c62ef5ad 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -347,25 +347,23 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 006b034ec1674a1..53b6aca3e9fcba9 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -645,10 +645,9 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -672,16 +671,14 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -711,40 +708,28 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -784,32 +769,17 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -902,20 +872,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -923,20 +891,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -991,11 +957,10 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v2i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -1021,18 +986,16 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -1057,47 +1020,28 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrad %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -1130,34 +1074,17 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -1251,22 +1178,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1274,22 +1199,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1892,10 +1815,9 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -1922,18 +1844,15 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: PR52719:
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 186e3dc12c5137a..9a483c345f92cb1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -670,10 +670,9 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -691,10 +690,9 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -702,15 +700,13 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -730,10 +726,9 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -754,8 +749,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -769,20 +764,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -796,8 +791,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i32> %a, %splat
@@ -835,31 +830,17 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -955,10 +936,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -978,10 +958,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -1047,12 +1026,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1071,12 +1048,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -1084,18 +1059,15 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -1117,10 +1089,9 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -1142,10 +1113,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1159,25 +1128,20 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1191,10 +1155,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -1233,33 +1195,17 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1358,11 +1304,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1382,11 +1327,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -2190,11 +2134,9 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -2217,11 +2159,9 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; XOPAVX2-LABEL: PR52719:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovd %edi, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -2230,17 +2170,14 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: PR52719:
@@ -2261,11 +2198,10 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 796c05d4d75ec62..a2fe36e72f6b9c9 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -148,8 +148,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i64> %a, %splat
@@ -159,8 +158,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i32> %a, %splat
@@ -179,8 +178,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i16> %a, %splat
@@ -234,10 +233,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -248,10 +245,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -272,8 +267,7 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
@@ -501,9 +495,7 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
; ALL-LABEL: PR52719:
; ALL: # %bb.0:
; ALL-NEXT: vmovd %edi, %xmm1
-; ALL-NEXT: vpbroadcastd %xmm1, %ymm1
-; ALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
-; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%vec = insertelement <8 x i32> poison, i32 %a1, i64 0
%splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index 41c900b1f6879c1..36a6226f8f4b9c2 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -1167,40 +1167,28 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v2i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v2i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v2i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v2i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -1240,32 +1228,17 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v4i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v4i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v4i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v4i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v4i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v4i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1304,32 +1277,17 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v2i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v2i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v2i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v2i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1423,20 +1381,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1444,20 +1400,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1565,20 +1519,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1586,20 +1538,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1698,20 +1648,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1719,20 +1667,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index a8c48d3f658c9ca..ca8343cd4812cdd 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -520,38 +520,24 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -577,40 +563,28 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -650,32 +624,17 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -752,20 +711,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -773,20 +730,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -820,44 +775,28 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -878,47 +817,28 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -951,34 +871,17 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -1056,22 +959,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1079,22 +980,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 6cc1261defbe82f..e65f78e49dc8d27 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -534,8 +534,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -548,20 +547,17 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -574,8 +570,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = lshr <4 x i64> %a, %splat
@@ -594,8 +589,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -609,20 +604,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -636,8 +631,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i32> %a, %splat
@@ -675,31 +670,17 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -781,10 +762,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -801,10 +781,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -853,10 +832,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -870,25 +847,20 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -903,8 +875,7 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -924,10 +895,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -941,25 +910,20 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -973,10 +937,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -1015,33 +977,17 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1126,11 +1072,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1147,11 +1092,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 1fbd0ea9b9f1573..efd73b4ca132bbf 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -112,8 +112,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i64> %a, %splat
@@ -123,8 +122,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i32> %a, %splat
@@ -143,8 +142,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i16> %a, %splat
@@ -189,10 +188,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -203,10 +200,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -227,8 +222,7 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 108e383dd7a029f..74ba1d04161f87b 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -933,40 +933,28 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v2i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v2i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v2i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v2i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -1006,32 +994,17 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v4i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v4i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v4i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v4i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v4i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v4i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1070,32 +1043,17 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v2i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v2i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v2i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v2i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1173,20 +1131,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1194,20 +1150,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1295,20 +1249,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1316,20 +1268,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1408,20 +1358,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1429,20 +1377,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 4b3b35e9fee8752..c54da38ef10cc18 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -431,38 +431,24 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -488,40 +474,28 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -561,32 +535,17 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -660,20 +619,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -681,20 +638,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -727,44 +682,28 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -785,47 +724,28 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: pslld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -858,34 +778,17 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -960,22 +863,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -983,22 +884,20 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 9f0b8a079517bd2..6dde209e94d8114 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -464,8 +464,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -478,20 +477,17 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -504,8 +500,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
@@ -524,8 +519,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -539,20 +534,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -566,8 +561,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
@@ -605,31 +600,17 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -707,10 +688,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -726,10 +706,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -778,10 +757,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -795,25 +772,20 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -828,8 +800,7 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -849,10 +820,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -866,25 +835,20 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
-; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -898,10 +862,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -940,33 +902,17 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1047,11 +993,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1067,11 +1012,10 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index bc03821cea22e11..8eca56d099feb38 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -107,8 +107,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i64> %a, %splat
@@ -118,8 +117,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i32> %a, %splat
@@ -138,8 +137,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i16> %a, %splat
@@ -182,10 +181,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
-; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -196,10 +193,8 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
-; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -220,8 +215,7 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
index 08150335b63bb3c..d545cb77cba2e49 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
@@ -786,40 +786,28 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_shift_v2i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_shift_v2i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v2i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_shift_v2i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -859,32 +847,17 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v4i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v4i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v4i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v4i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v4i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v4i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -923,32 +896,17 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512DQ-LABEL: splatvar_shift_v2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: splatvar_shift_v2i16:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQVL-LABEL: splatvar_shift_v2i16:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX512-LABEL: splatvar_shift_v2i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
;
-; AVX512BWVL-LABEL: splatvar_shift_v2i16:
-; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: retq
+; AVX512VL-LABEL: splatvar_shift_v2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1022,20 +980,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1043,20 +999,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1139,20 +1093,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1160,20 +1112,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1249,20 +1199,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1270,20 +1218,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index ee3c0da25537692..8dda27145bd374e 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -227,10 +227,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
;
; AVX2-LABEL: blendv_split:
; AVX2: ## %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX2-NEXT: vpbroadcastd %xmm3, %ymm3
-; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
-; AVX2-NEXT: vpsllvd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
+; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vmovups %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
@@ -241,10 +241,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
-; AVX512-NEXT: vpbroadcastd %xmm2, %ymm0
-; AVX512-NEXT: vpbroadcastd %xmm3, %ymm2
-; AVX512-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
-; AVX512-NEXT: vpsllvd %ymm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
+; AVX512-NEXT: vpslld %xmm2, %ymm1, %ymm2
+; AVX512-NEXT: vpslld %xmm0, %ymm1, %ymm2 {%k1}
; AVX512-NEXT: vmovdqu %ymm2, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
>From fa905024e48ae3b16db58eb755bb6cf2e0acc396 Mon Sep 17 00:00:00 2001
From: SahilPatidar <patidarsahil2001 at gmail.com>
Date: Mon, 22 Apr 2024 13:36:17 +0530
Subject: [PATCH 3/3] add and update test
---
.../test/CodeGen/X86/vector-shift-ashr-128.ll | 189 +++++--
.../test/CodeGen/X86/vector-shift-ashr-256.ll | 492 ++++++++++------
.../test/CodeGen/X86/vector-shift-lshr-128.ll | 203 ++++++-
.../test/CodeGen/X86/vector-shift-lshr-256.ll | 453 ++++++++++-----
llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 203 ++++++-
llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 532 +++++++++++++-----
6 files changed, 1533 insertions(+), 539 deletions(-)
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 53b6aca3e9fcba9..7f579e0172a11af 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
@@ -643,14 +644,24 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v2i64:
; XOPAVX1: # %bb.0:
@@ -708,11 +719,23 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v4i32:
; XOP: # %bb.0:
@@ -955,15 +978,26 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
; XOPAVX1: # %bb.0:
@@ -1020,11 +1054,25 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrad %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v4i32:
; XOP: # %bb.0:
@@ -1812,15 +1860,26 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: PR52719:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovd %edi, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: PR52719:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: PR52719:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: PR52719:
; XOPAVX1: # %bb.0:
@@ -1870,3 +1929,59 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
%ashr = ashr <2 x i64> %a0, %zext
ret <2 x i64> %ashr
}
+
+
+define <4 x i32> @shift_splat_vec4i32(<4 x i32> %x, i32 %s) {
+; SSE-LABEL: shift_splat_vec4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: psrad %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shift_splat_vec4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
+;
+; XOP-LABEL: shift_splat_vec4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovd %edi, %xmm1
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: shift_splat_vec4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: psrad %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %vec = insertelement <4 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ %shr = ashr <4 x i32> %x, %splat
+ ret <4 x i32> %shr
+}
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 9a483c345f92cb1..33469cc57251663 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
@@ -44,14 +45,14 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -157,10 +158,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -263,19 +264,19 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; AVX2-NEXT: vpsravd %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
-; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
-; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; AVX-NEXT: vpsravd %ymm3, %ymm4, %ymm3
+; AVX-NEXT: vpsrld $16, %ymm3, %ymm3
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; AVX-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -438,33 +439,33 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX2-NEXT: vpsraw $4, %ymm3, %ymm4
-; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vpsraw $2, %ymm3, %ymm4
-; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vpsraw $1, %ymm3, %ymm4
-; AVX2-NEXT: vpaddw %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
-; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX2-NEXT: vpsraw $4, %ymm0, %ymm3
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsraw $2, %ymm0, %ymm3
-; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsraw $1, %ymm0, %ymm3
-; AVX2-NEXT: vpaddw %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX-NEXT: vpsraw $4, %ymm3, %ymm4
+; AVX-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX-NEXT: vpsraw $4, %ymm0, %ymm3
+; AVX-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpsraw $2, %ymm0, %ymm3
+; AVX-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpsraw $1, %ymm0, %ymm3
+; AVX-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -668,14 +669,24 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -747,11 +758,17 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -809,11 +826,11 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -882,20 +899,20 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpbroadcastb {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
+; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1024,15 +1041,27 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -1111,11 +1140,19 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -1174,11 +1211,11 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1248,20 +1285,20 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpbroadcastb {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
+; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1397,13 +1434,13 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -1482,10 +1519,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -1548,14 +1585,14 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; AVX-NEXT: vpsraw $1, %xmm0, %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1650,18 +1687,18 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX2-NEXT: vpsraw $8, %ymm1, %ymm1
-; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
-; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX2-NEXT: vpsraw $8, %ymm0, %ymm0
-; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX-NEXT: vpsraw $8, %ymm1, %ymm1
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX-NEXT: vpsraw $8, %ymm0, %ymm0
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1780,12 +1817,12 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrad $7, %ymm0, %ymm1
-; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrad $7, %ymm0, %ymm1
+; AVX-NEXT: vpsrlq $7, %ymm0, %ymm0
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -1846,10 +1883,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrad $5, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrad $5, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -1899,10 +1936,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsraw $3, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsraw $3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1960,14 +1997,14 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -2057,12 +2094,12 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shift32_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX2-NEXT: retq
+; AVX-LABEL: shift32_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrad $31, %ymm0, %ymm1
+; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: shift32_v4i64:
; XOPAVX1: # %bb.0:
@@ -2131,15 +2168,27 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: PR52719:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovd %edi, %xmm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: PR52719:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: PR52719:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: PR52719:
; XOPAVX1: # %bb.0:
@@ -2211,3 +2260,74 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
%ashr = ashr <4 x i64> %a0, %zext
ret <4 x i64> %ashr
}
+
+
+define <8 x i32> @shift_splat_vec8i32(<8 x i32> %x, i32 %s) {
+; AVX1-LABEL: shift_splat_vec8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; XOPAVX1-LABEL: shift_splat_vec8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vmovd %edi, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift_splat_vec8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vmovd %edi, %xmm1
+; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift_splat_vec8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpsrad %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsrad %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift_splat_vec8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %vec = insertelement <8 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
+ %shr = ashr <8 x i32> %x, %splat
+ ret <8 x i32> %shr
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index ca8343cd4812cdd..d6bee46e1a897a2 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
@@ -520,10 +521,21 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v2i64:
; XOP: # %bb.0:
@@ -563,11 +575,23 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v4i32:
; XOP: # %bb.0:
@@ -775,11 +799,24 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v2i64:
; XOP: # %bb.0:
@@ -817,11 +854,25 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v4i32:
; XOP: # %bb.0:
@@ -1611,3 +1662,115 @@ define <4 x i32> @vector_variable_shift_right(<4 x i1> %cond, <4 x i32> %x, <4 x
%sh = lshr <4 x i32> %z, %sel
ret <4 x i32> %sh
}
+
+define <4 x i32> @shift_splat_vec4i32(<4 x i32> %x, i32 %s) {
+; SSE-LABEL: shift_splat_vec4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: psrld %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shift_splat_vec4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
+;
+; XOP-LABEL: shift_splat_vec4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovd %edi, %xmm1
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: shift_splat_vec4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: psrld %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %vec = insertelement <4 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ %shr = lshr <4 x i32> %x, %splat
+ ret <4 x i32> %shr
+}
+
+define <2 x i64> @shift_splat_zext_vec2i64(<2 x i64> %x, i32 %s) {
+; SSE-LABEL: shift_splat_zext_vec2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: psrlq %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shift_splat_zext_vec2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_zext_vec2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_zext_vec2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-FAST-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
+;
+; XOP-LABEL: shift_splat_zext_vec2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovd %edi, %xmm1
+; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_zext_vec2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_zext_vec2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: shift_splat_zext_vec2i64:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: psrlq %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %vec = insertelement <2 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
+ %zext = zext <2 x i32> %splat to <2 x i64>
+ %shr = lshr <2 x i64> %x, %zext
+ ret <2 x i64> %shr
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index e65f78e49dc8d27..e3dcd93c65556a3 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
@@ -32,10 +33,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -118,10 +119,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -224,19 +225,19 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; AVX2-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
-; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
-; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; AVX-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
+; AVX-NEXT: vpsrld $16, %ymm3, %ymm3
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; AVX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -378,21 +379,21 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: var_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -532,10 +533,16 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -587,11 +594,17 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -649,11 +662,11 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -716,16 +729,16 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -830,11 +843,19 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -893,11 +914,19 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -956,11 +985,11 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1024,16 +1053,16 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1146,10 +1175,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -1213,10 +1242,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -1277,12 +1306,12 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1369,17 +1398,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: constant_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: constant_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1488,10 +1517,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq $7, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v4i64:
; XOPAVX1: # %bb.0:
@@ -1541,10 +1570,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrld $5, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrld $5, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v8i32:
; XOPAVX1: # %bb.0:
@@ -1594,10 +1623,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v16i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v16i16:
; XOPAVX1: # %bb.0:
@@ -1650,11 +1679,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatconstant_shift_v32i8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_shift_v32i8:
; XOPAVX1: # %bb.0:
@@ -1715,10 +1744,10 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shift32_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: shift32_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: shift32_v4i64:
; XOPAVX1: # %bb.0:
@@ -1768,15 +1797,15 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX2-LABEL: sh_trunc_sh_vec:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq $36, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575]
-; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: sh_trunc_sh_vec:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsrlq $36, %ymm0, %ymm0
+; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575]
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; XOPAVX1-LABEL: sh_trunc_sh_vec:
; XOPAVX1: # %bb.0:
@@ -1839,3 +1868,145 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
%r = lshr <4 x i32> %t, <i32 12, i32 12, i32 12, i32 12>
ret <4 x i32> %r
}
+
+define <8 x i32> @shift_splat_vec8i32(<8 x i32> %x, i32 %s) {
+; AVX1-LABEL: shift_splat_vec8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; XOPAVX1-LABEL: shift_splat_vec8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vmovd %edi, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift_splat_vec8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vmovd %edi, %xmm1
+; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift_splat_vec8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpsrld %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsrld %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift_splat_vec8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %vec = insertelement <8 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
+ %shr = lshr <8 x i32> %x, %splat
+ ret <8 x i32> %shr
+}
+
+define <4 x i64> @shift_splat_zext_vec4i64(<4 x i64> %x, i32 %s) {
+; AVX1-LABEL: shift_splat_zext_vec4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_zext_vec4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_zext_vec4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX2-FAST-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; XOPAVX1-LABEL: shift_splat_zext_vec4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vmovd %edi, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: shift_splat_zext_vec4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vmovd %edi, %xmm1
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_zext_vec4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_zext_vec4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift_splat_zext_vec4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift_splat_zext_vec4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %vec = insertelement <4 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ %zext = zext <4 x i32> %splat to <4 x i64>
+ %shr = lshr <4 x i64> %x, %zext
+ ret <4 x i64> %shr
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index c54da38ef10cc18..7cc35eb9a3d25d3 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
@@ -431,10 +432,21 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v2i64:
; XOP: # %bb.0:
@@ -474,11 +486,23 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v4i32:
; XOP: # %bb.0:
@@ -682,11 +706,24 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v2i64:
; XOP: # %bb.0:
@@ -724,11 +761,25 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: pslld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v4i32:
; XOP: # %bb.0:
@@ -1338,3 +1389,115 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
+
+define <4 x i32> @shift_splat_vec4i32(<4 x i32> %x, i32 %s) {
+; SSE-LABEL: shift_splat_vec4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: pslld %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shift_splat_vec4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec4i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec4i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
+;
+; XOP-LABEL: shift_splat_vec4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovd %edi, %xmm1
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: shift_splat_vec4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: pslld %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %vec = insertelement <4 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ %shr = shl <4 x i32> %x, %splat
+ ret <4 x i32> %shr
+}
+
+define <2 x i64> @shift_splat_zext_vec2i64(<2 x i64> %x, i32 %s) {
+; SSE-LABEL: shift_splat_zext_vec2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: psllq %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: shift_splat_zext_vec2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_zext_vec2i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_zext_vec2i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-FAST-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: retq
+;
+; XOP-LABEL: shift_splat_zext_vec2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovd %edi, %xmm1
+; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_zext_vec2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_zext_vec2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: shift_splat_zext_vec2i64:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT: psllq %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %vec = insertelement <2 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
+ %zext = zext <2 x i32> %splat to <2 x i64>
+ %shr = shl <2 x i64> %x, %zext
+ ret <2 x i64> %shr
+}
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 6dde209e94d8114..f19d621c43d904c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+tuning-fast-per-element-vector-shift | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL
@@ -46,10 +47,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: var_shift_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: var_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: var_shift_v4i64:
; AVX512: # %bb.0:
@@ -80,6 +81,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <4 x i64> %a, %b
ret <4 x i64> %shift
}
@@ -115,10 +120,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: var_shift_v8i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: var_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: var_shift_v8i32:
; AVX512: # %bb.0:
@@ -150,6 +155,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <8 x i32> %a, %b
ret <8 x i32> %shift
}
@@ -206,14 +215,19 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: var_shift_v16i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
-; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2
-; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: var_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; AVX-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
+; AVX-NEXT: vpsrld $16, %ymm3, %ymm3
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; AVX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: var_shift_v16i16:
; AVX512DQ: # %bb.0:
@@ -285,6 +299,14 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; X86-AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; X86-AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
}
@@ -345,14 +367,20 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: var_shift_v32i8:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
-; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2
-; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: var_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: var_shift_v32i8:
; AVX512DQ: # %bb.0:
@@ -445,6 +473,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <32 x i8> %a, %b
ret <32 x i8> %shift
}
@@ -462,11 +498,6 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -475,10 +506,16 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
@@ -502,6 +539,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
ret <4 x i64> %shift
@@ -517,12 +558,6 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -532,11 +567,17 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v8i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-SLOW-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
@@ -564,6 +605,11 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
ret <8 x i32> %shift
@@ -594,11 +640,11 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v16i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v16i16:
; AVX512: # %bb.0:
@@ -626,6 +672,11 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i16> %a, %splat
ret <16 x i16> %shift
@@ -667,14 +718,15 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_shift_v32i8:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
-; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatvar_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: splatvar_shift_v32i8:
; AVX512DQ: # %bb.0:
@@ -736,6 +788,14 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i8> %a, %splat
ret <32 x i8> %shift
@@ -755,12 +815,6 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v4i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -770,11 +824,19 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
@@ -802,6 +864,11 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
@@ -818,12 +885,6 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_modulo_shift_v8i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: retq
-;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -833,11 +894,19 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX2-SLOW-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
@@ -865,6 +934,11 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
@@ -896,11 +970,11 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
@@ -928,6 +1002,11 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <16 x i16> %mod, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i16> %a, %splat
@@ -971,15 +1050,15 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQ: # %bb.0:
@@ -1043,6 +1122,15 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%mod = and <32 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%splat = shufflevector <32 x i8> %mod, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i8> %a, %splat
@@ -1079,10 +1167,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: constant_shift_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: constant_shift_v4i64:
; AVX512: # %bb.0:
@@ -1110,6 +1198,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift
}
@@ -1136,10 +1228,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: constant_shift_v8i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: constant_shift_v8i32:
; AVX512: # %bb.0:
@@ -1163,6 +1255,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
ret <8 x i32> %shift
}
@@ -1189,10 +1285,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: constant_shift_v16i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: constant_shift_v16i16:
; AVX512DQ: # %bb.0:
@@ -1229,6 +1325,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
}
@@ -1278,14 +1378,17 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: constant_shift_v32i8:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
-; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: constant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512DQ-LABEL: constant_shift_v32i8:
; AVX512DQ: # %bb.0:
@@ -1359,6 +1462,14 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; X86-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
@@ -1389,10 +1500,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatconstant_shift_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllq $7, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatconstant_shift_v4i64:
; AVX512: # %bb.0:
@@ -1416,6 +1527,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllq $7, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
@@ -1442,10 +1557,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatconstant_shift_v8i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v8i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpslld $5, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatconstant_shift_v8i32:
; AVX512: # %bb.0:
@@ -1469,6 +1584,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpslld $5, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
@@ -1495,10 +1614,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatconstant_shift_v16i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v16i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatconstant_shift_v16i16:
; AVX512: # %bb.0:
@@ -1522,6 +1641,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
@@ -1553,11 +1676,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: splatconstant_shift_v32i8:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: splatconstant_shift_v32i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatconstant_shift_v32i8:
; AVX512: # %bb.0:
@@ -1587,6 +1710,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
@@ -1615,10 +1743,10 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; XOPAVX1-NEXT: retq
;
-; XOPAVX2-LABEL: shift32_v4i64:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
-; XOPAVX2-NEXT: retq
+; AVX-LABEL: shift32_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: shift32_v4i64:
; AVX512: # %bb.0:
@@ -1641,6 +1769,140 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
+; XOPAVX2-LABEL: shift32_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
%shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
ret <4 x i64> %shift
}
+
+define <8 x i32> @shift_splat_vec8i32(<8 x i32> %x, i32 %s) {
+; AVX1-LABEL: shift_splat_vec8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; XOPAVX1-LABEL: shift_splat_vec8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vmovd %edi, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_vec8i32:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_vec8i32:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-FAST-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_vec8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_vec8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift_splat_vec8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpslld %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift_splat_vec8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %vec = insertelement <8 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
+ %shr = shl <8 x i32> %x, %splat
+ ret <8 x i32> %shr
+}
+
+define <4 x i64> @shift_splat_zext_vec4i64(<4 x i64> %x, i32 %s) {
+; AVX1-LABEL: shift_splat_zext_vec4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovd %edi, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; XOPAVX1-LABEL: shift_splat_zext_vec4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vmovd %edi, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shift_splat_zext_vec4i64:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovd %edi, %xmm1
+; AVX2-SLOW-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shift_splat_zext_vec4i64:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovd %edi, %xmm1
+; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX2-FAST-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-FAST-NEXT: retq
+;
+; AVX512-LABEL: shift_splat_zext_vec4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovd %edi, %xmm1
+; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: shift_splat_zext_vec4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovd %edi, %xmm1
+; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: shift_splat_zext_vec4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vpsllq %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: shift_splat_zext_vec4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %vec = insertelement <4 x i32> poison, i32 %s, i64 0
+ %splat = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ %zext = zext <4 x i32> %splat to <4 x i64>
+ %shr = shl <4 x i64> %x, %zext
+ ret <4 x i64> %shr
+}
More information about the llvm-commits
mailing list