[llvm] [X86][AVX] Prefer VPSRAV to VPSRA style shifts for known splats #39424 (PR #87913)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 7 03:31:42 PDT 2024
https://github.com/SahilPatidar created https://github.com/llvm/llvm-project/pull/87913
Resolve #39424
>From 52e9ab82444f706a50c3422df7339fa49fb4d79c Mon Sep 17 00:00:00 2001
From: SahilPatidar <patidarsahil2001 at gmail.com>
Date: Sat, 6 Apr 2024 15:30:53 +0530
Subject: [PATCH] [X86][AVX] Prefer VPSRAV to VPSRA style shifts for known
splats #39424
---
llvm/lib/Target/X86/X86.td | 7 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +-
llvm/test/CodeGen/X86/avx2-vector-shifts.ll | 4 +-
llvm/test/CodeGen/X86/vector-fshl-128.ll | 102 ++++---
llvm/test/CodeGen/X86/vector-fshl-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshl-512.ll | 66 +++--
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 59 ++--
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 40 +--
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-fshr-128.ll | 100 ++++---
llvm/test/CodeGen/X86/vector-fshr-256.ll | 89 +++---
llvm/test/CodeGen/X86/vector-fshr-512.ll | 66 +++--
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 51 ++--
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 36 ++-
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 22 +-
llvm/test/CodeGen/X86/vector-rotate-128.ll | 50 ++--
llvm/test/CodeGen/X86/vector-rotate-256.ll | 38 +--
llvm/test/CodeGen/X86/vector-rotate-512.ll | 22 +-
.../test/CodeGen/X86/vector-shift-ashr-128.ll | 235 ++++++++++-----
.../test/CodeGen/X86/vector-shift-ashr-256.ll | 222 +++++++++-----
.../test/CodeGen/X86/vector-shift-ashr-512.ll | 30 +-
.../CodeGen/X86/vector-shift-ashr-sub128.ll | 170 +++++++----
.../test/CodeGen/X86/vector-shift-lshr-128.ll | 273 ++++++++++++------
.../test/CodeGen/X86/vector-shift-lshr-256.ll | 182 ++++++++----
.../test/CodeGen/X86/vector-shift-lshr-512.ll | 26 +-
.../CodeGen/X86/vector-shift-lshr-sub128.ll | 170 +++++++----
llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 273 ++++++++++++------
llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 182 ++++++++----
llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 26 +-
.../CodeGen/X86/vector-shift-shl-sub128.ll | 170 +++++++----
llvm/test/CodeGen/X86/vselect-avx.ll | 16 +-
31 files changed, 1826 insertions(+), 1018 deletions(-)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 78bc043911f2fc..a9f3ae1f847552 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -599,6 +599,10 @@ def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
"PreferLowerShuffleAsShift", "true",
"Shifts are faster (or as fast) as shuffle">;
+def TuningPreferPerEltVectorShift : SubtargetFeature<"tuning-fast-per-element-vector-shift",
+ "PreferPerEltVectorShift", "true",
+ "Vector per element shifts are faster (1/cycle latency)">;
+
def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
"FastImmVectorShift", "true",
"Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
@@ -996,7 +1000,8 @@ def ProcessorFeatures {
TuningNoDomainDelayMov,
TuningNoDomainDelayShuffle,
TuningNoDomainDelayBlend,
- TuningFastImmVectorShift];
+ TuningFastImmVectorShift,
+ TuningPreferPerEltVectorShift];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b9a87f9024c7de..2d0bd115d7b030 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29318,8 +29318,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerShiftByScalarImmediate(Op, DAG, Subtarget))
return V;
- if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
- return V;
+ if (!supportedVectorVarShift(VT, Subtarget, Opc) &&
+ !Subtarget.preferPerEltVectorShift())
+ if (SDValue V = LowerShiftByScalarVariable(Op, DAG, Subtarget))
+ return V;
if (supportedVectorVarShift(VT, Subtarget, Opc))
return Op;
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
index 983c69d1a1c2e8..6ad8106bba6d4f 100644
--- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -61,14 +61,14 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
; X86: # %bb.0:
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
+; X86-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_vpslld_var:
; X64: # %bb.0:
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
-; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
+; X64-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%amt = insertelement <8 x i32> undef, i32 %shift, i32 0
%tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 1addedf3c3d960..577a86dff54e96 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -992,47 +992,62 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1048,12 +1063,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1063,16 +1079,28 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpsrlq $1, %xmm1, %xmm1
-; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpsrlq $1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1255,13 +1283,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1276,12 +1307,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index ebcb1cb15a600e..e11f26e10b0ea6 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -778,45 +778,49 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512F-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512BW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -831,12 +835,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VLBW-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -866,12 +871,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4
; XOPAVX2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1049,12 +1055,14 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1069,12 +1077,13 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm4
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index e23855361e57a2..fe8af191d0ff40 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -426,34 +426,37 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512F-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -465,12 +468,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VLBW-NEXT: vpsrlq $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -580,12 +584,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -597,12 +602,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm4
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 6b5ba7042c5c51..73a81614f9a937 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -750,17 +750,30 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT: vpsllq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsllvq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -990,23 +1003,27 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 731a9f4e403730..8c3e27aaba1fb1 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -587,13 +587,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpsllvq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -806,23 +807,26 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index edfa56a70d59e9..85a43ea02034d5 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -334,23 +334,25 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 638a3cdaa2c1d2..56333a69c3e694 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1113,47 +1113,62 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v2i64:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
@@ -1169,12 +1184,13 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
@@ -1185,16 +1201,28 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOP-LABEL: splatvar_funnnel_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
-; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0
-; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %xmm2
+; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
; X86-SSE2: # %bb.0:
@@ -1380,13 +1408,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
@@ -1401,12 +1432,13 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 3fabf720da71c3..20143d1f9fe66d 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -808,45 +808,49 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -861,12 +865,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
@@ -897,12 +902,13 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpbroadcastq %xmm2, %ymm2
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
+; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4
+; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2
; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
%splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -1081,12 +1087,14 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1101,12 +1109,13 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4
+; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 91dd83050e17e6..259e5a5f471154 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -424,34 +424,37 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v8i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -464,12 +467,13 @@ define <8 x i64> @splatvar_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y, <8 x i64> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i64:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastq %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} zmm3 = [63,63,63,63,63,63,63,63]
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -582,12 +586,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -600,12 +605,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %zmm2
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4
+; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 01578d399b774f..50603fa03e9b23 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -777,17 +777,30 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_funnnel_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_funnnel_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_funnnel_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlvq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v2i64:
; AVX512F: # %bb.0:
@@ -1027,23 +1040,27 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm0, %xmm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index 0aa91b74e12cab..36872bc144864f 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -618,13 +618,14 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; AVX2-LABEL: splatvar_funnnel_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [63,63]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpsrlvq %ymm3, %ymm0, %ymm3
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX2-NEXT: retq
;
@@ -845,23 +846,26 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpandn %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm0, %ymm3
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 4364c047dfdebf..d262a866bf6f43 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -332,23 +332,25 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 2e21f8d0aa32a1..20797cd22d5737 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -680,14 +680,24 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_rotate_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
-; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_rotate_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_rotate_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v2i64:
; AVX512NOVLX: # %bb.0:
@@ -860,23 +870,27 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_rotate_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %xmm3, %xmm1, %xmm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0]
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpsrlvw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index b5f0522327a448..a2a01a2777f7e4 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -531,11 +531,12 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
-; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [64,64,64,64]
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512NOVLX-LABEL: splatvar_rotate_v4i64:
@@ -697,23 +698,26 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandn %ymm3, %ymm1, %ymm4
+; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm2, %zmm2
+; AVX512BW-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandn %ymm2, %ymm1, %ymm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 29afbf4c62ef5a..c8a5db70af8bd4 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -347,23 +347,25 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
-; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vpandnq %zmm2, %zmm1, %zmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 53b6aca3e9fcba..006b034ec1674a 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -645,9 +645,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v2i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -671,14 +672,16 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -708,28 +711,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -769,17 +784,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -872,18 +902,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -891,18 +923,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -957,10 +991,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v2i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -986,16 +1021,18 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -1020,28 +1057,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrad %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -1074,17 +1130,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -1178,20 +1251,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1199,20 +1274,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1815,9 +1892,10 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -1844,15 +1922,18 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovd %edi, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsravq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: PR52719:
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 9a483c345f92cb..186e3dc12c5137 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -670,9 +670,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -690,9 +691,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -700,13 +702,15 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -726,9 +730,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -749,8 +754,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -764,20 +769,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -791,8 +796,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i32> %a, %splat
@@ -830,17 +835,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -936,9 +955,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -958,9 +978,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -1026,10 +1047,12 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1048,10 +1071,12 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -1059,15 +1084,18 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -1089,9 +1117,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
@@ -1113,8 +1142,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1128,20 +1159,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -1155,8 +1191,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -1195,17 +1233,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1304,10 +1358,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1327,10 +1382,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -2134,9 +2190,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX2-LABEL: PR52719:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -2159,9 +2217,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; XOPAVX2-LABEL: PR52719:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovd %edi, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
@@ -2170,14 +2230,17 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovd %edi, %xmm1
-; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: PR52719:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovd %edi, %xmm1
-; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX512VL-NEXT: vpsravq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: PR52719:
@@ -2198,10 +2261,11 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index a2fe36e72f6b9c..796c05d4d75ec6 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -148,7 +148,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i64> %a, %splat
@@ -158,8 +159,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i32> %a, %splat
@@ -178,8 +179,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i16> %a, %splat
@@ -233,8 +234,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -245,8 +248,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -267,7 +272,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
@@ -495,7 +501,9 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
; ALL-LABEL: PR52719:
; ALL: # %bb.0:
; ALL-NEXT: vmovd %edi, %xmm1
-; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %ymm1
+; ALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%vec = insertelement <8 x i32> poison, i32 %a1, i64 0
%splat = shufflevector <8 x i32> %vec, <8 x i32> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index 36a6226f8f4b9c..41c900b1f6879c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -1167,28 +1167,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -1228,17 +1240,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1277,17 +1304,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1381,18 +1423,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1400,18 +1444,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1519,18 +1565,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1538,18 +1586,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1648,18 +1698,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1667,18 +1719,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index ca8343cd4812cd..a8c48d3f658c9c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -520,24 +520,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -563,28 +577,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -624,17 +650,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -711,18 +752,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -730,18 +773,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -775,28 +820,44 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -817,28 +878,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: psrld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -871,17 +951,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -959,20 +1056,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -980,20 +1079,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index e65f78e49dc8d2..6cc1261defbe82 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -534,7 +534,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -547,17 +548,20 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -570,7 +574,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = lshr <4 x i64> %a, %splat
@@ -589,8 +594,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -604,20 +609,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -631,8 +636,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i32> %a, %splat
@@ -670,17 +675,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -762,9 +781,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -781,9 +801,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -832,8 +853,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -847,20 +870,25 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -875,7 +903,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -895,8 +924,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -910,20 +941,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -937,8 +973,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -977,17 +1015,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -1072,10 +1126,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1092,10 +1147,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index efd73b4ca132bb..1fbd0ea9b9f157 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -112,7 +112,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i64> %a, %splat
@@ -122,8 +123,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i32> %a, %splat
@@ -142,8 +143,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i16> %a, %splat
@@ -188,8 +189,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -200,8 +203,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -222,7 +227,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 74ba1d04161f87..108e383dd7a029 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -933,28 +933,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -994,17 +1006,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -1043,17 +1070,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -1131,18 +1173,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1150,18 +1194,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1249,18 +1295,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1268,18 +1316,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1358,18 +1408,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1377,18 +1429,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index c54da38ef10cc1..4b3b35e9fee875 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -431,24 +431,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i64:
@@ -474,28 +488,40 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i32:
@@ -535,17 +561,32 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -619,18 +660,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -638,18 +681,20 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -682,28 +727,44 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
; SSE-NEXT: psllq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v2i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v2i64:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v2i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
@@ -724,28 +785,47 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE-NEXT: pslld %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: splatvar_modulo_shift_v4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_modulo_shift_v4i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
@@ -778,17 +858,34 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
@@ -863,20 +960,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -884,20 +983,22 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 6dde209e94d811..9f0b8a079517bd 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -464,7 +464,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v4i64:
@@ -477,17 +478,20 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v4i64:
@@ -500,7 +504,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v4i64:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
@@ -519,8 +524,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
@@ -534,20 +539,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX2-LABEL: splatvar_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
@@ -561,8 +566,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX2-LABEL: splatvar_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
@@ -600,17 +605,31 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -688,9 +707,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BW-LABEL: splatvar_shift_v32i8:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -706,9 +726,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512BWVL-LABEL: splatvar_shift_v32i8:
; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
@@ -757,8 +778,10 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -772,20 +795,25 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
@@ -800,7 +828,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -820,8 +849,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -835,20 +866,25 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
@@ -862,8 +898,10 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -902,17 +940,33 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
@@ -993,10 +1047,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
@@ -1012,10 +1067,11 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 8eca56d099feb3..bc03821cea22e1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -107,7 +107,8 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i64> %a, %splat
@@ -117,8 +118,8 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i32> %a, %splat
@@ -137,8 +138,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_shift_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i16> %a, %splat
@@ -181,8 +182,10 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v8i64:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastq %xmm1, %zmm1
+; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
%splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -193,8 +196,10 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpbroadcastd %xmm1, %zmm1
+; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -215,7 +220,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
index d545cb77cba2e4..08150335b63bb3 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
@@ -786,28 +786,40 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: splatvar_shift_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: splatvar_shift_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; XOP-LABEL: splatvar_shift_v2i32:
-; XOP: # %bb.0:
-; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: retq
+; AVX2-LABEL: splatvar_shift_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v2i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v2i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_shift_v2i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i32:
@@ -847,17 +859,32 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v4i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v4i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v4i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v4i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v4i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v4i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v4i16:
; X86-SSE: # %bb.0:
@@ -896,17 +923,32 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
-; AVX512-LABEL: splatvar_shift_v2i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512DQ-LABEL: splatvar_shift_v2i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT: retq
;
-; AVX512VL-LABEL: splatvar_shift_v2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
+; AVX512BW-LABEL: splatvar_shift_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_shift_v2i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_shift_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_shift_v2i16:
; X86-SSE: # %bb.0:
@@ -980,18 +1022,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v8i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -999,18 +1043,20 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v8i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v8i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1093,18 +1139,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v4i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1112,18 +1160,20 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v4i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
@@ -1199,18 +1249,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v2i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v2i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
@@ -1218,18 +1270,20 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
;
; AVX512DQVL-LABEL: splatvar_shift_v2i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_shift_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 8dda27145bd374..ee3c0da2553769 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -227,10 +227,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
;
; AVX2-LABEL: blendv_split:
; AVX2: ## %bb.0:
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
-; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
+; AVX2-NEXT: vpbroadcastd %xmm3, %ymm3
+; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpsllvd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vmovups %ymm0, (%rdi)
; AVX2-NEXT: vzeroupper
@@ -241,10 +241,10 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512-NEXT: vpslld $31, %ymm0, %ymm0
; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
-; AVX512-NEXT: vpslld %xmm2, %ymm1, %ymm2
-; AVX512-NEXT: vpslld %xmm0, %ymm1, %ymm2 {%k1}
+; AVX512-NEXT: vpbroadcastd %xmm2, %ymm0
+; AVX512-NEXT: vpbroadcastd %xmm3, %ymm2
+; AVX512-NEXT: vpsllvd %ymm2, %ymm1, %ymm2
+; AVX512-NEXT: vpsllvd %ymm0, %ymm1, %ymm2 {%k1}
; AVX512-NEXT: vmovdqu %ymm2, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
More information about the llvm-commits
mailing list