[llvm] 5fedbd5 - [DAG] SimplifyDemandedVectorElts - zero_extend_vector_inreg(and(x,c)) -> and(x,c')
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 17 04:42:29 PST 2021
Author: Simon Pilgrim
Date: 2021-11-17T12:41:48Z
New Revision: 5fedbd5b181523541331eee3008467a39f0334cb
URL: https://github.com/llvm/llvm-project/commit/5fedbd5b181523541331eee3008467a39f0334cb
DIFF: https://github.com/llvm/llvm-project/commit/5fedbd5b181523541331eee3008467a39f0334cb.diff
LOG: [DAG] SimplifyDemandedVectorElts - zero_extend_vector_inreg(and(x,c)) -> and(x,c')
If we've only demanded the 0'th element, and it comes from a (one-use) AND, try to convert the zero_extend_vector_inreg into a mask and constant fold it with the AND.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 44cbb2b45055..f4e1fe25f536 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2809,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 20022c89edfb..0b4edf10cd9e 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -1205,79 +1205,56 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,31,31,31]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; SSE41-NEXT: psrld $1, %xmm1
; SSE41-NEXT: psrld %xmm4, %xmm1
; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; SSE41-NEXT: pslld %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpsrld %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpslld %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512F-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512F-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX512VL-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512VL-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v4i32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512BW-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512BW-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
@@ -1294,13 +1271,11 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512VLBW-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512VLBW-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
@@ -1311,31 +1286,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; AVX512VLVBMI2-NEXT: vpshldvd %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
-; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; XOPAVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; XOPAVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; XOPAVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; XOPAVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrld $1, %xmm1, %xmm1
+; XOP-NEXT: vpsrld %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpslld %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
; X86-SSE2: # %bb.0:
@@ -1375,66 +1335,56 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: psrlw %xmm4, %xmm1
; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; SSE41-NEXT: psllw %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
@@ -1451,13 +1401,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
@@ -1470,13 +1418,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; XOP-LABEL: splatvar_funnnel_v8i16:
; XOP: # %bb.0:
-; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
@@ -1534,45 +1480,43 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; SSE41-LABEL: splatvar_funnnel_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; SSE41-NEXT: movdqa %xmm2, %xmm4
-; SSE41-NEXT: pandn %xmm3, %xmm4
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pshufb %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,0,0,0]
+; SSE41-NEXT: movdqa %xmm2, %xmm5
+; SSE41-NEXT: pandn %xmm4, %xmm5
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psrlw %xmm4, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
+; SSE41-NEXT: psrlw %xmm5, %xmm1
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
-; SSE41-NEXT: psrlw %xmm4, %xmm6
-; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE41-NEXT: pand %xmm1, %xmm6
-; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: psllw %xmm1, %xmm0
-; SSE41-NEXT: psllw %xmm1, %xmm5
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb %xmm1, %xmm5
-; SSE41-NEXT: pand %xmm5, %xmm0
-; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE41-NEXT: psrlw %xmm5, %xmm7
+; SSE41-NEXT: pshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: pand %xmm1, %xmm7
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: psllw %xmm2, %xmm0
+; SSE41-NEXT: psllw %xmm2, %xmm6
+; SSE41-NEXT: pshufb %xmm3, %xmm6
+; SSE41-NEXT: pand %xmm6, %xmm0
+; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,0,0,0]
+; AVX1-NEXT: vpandn %xmm4, %xmm2, %xmm5
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpsrlw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsllw %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw %xmm2, %xmm6, %xmm2
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1580,16 +1524,15 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX2-LABEL: splatvar_funnnel_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4
; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
@@ -1602,13 +1545,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512F-LABEL: splatvar_funnnel_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld %xmm4, %zmm0, %zmm0
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
@@ -1620,13 +1561,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld %xmm4, %zmm0, %zmm0
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
@@ -1638,13 +1577,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
@@ -1657,13 +1594,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VBMI2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
@@ -1676,13 +1611,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
@@ -1694,13 +1627,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index c644c749c693..5f9357a33932 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -1252,20 +1252,18 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX2-LABEL: splatvar_funnnel_v32i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
+; AVX2-NEXT: vpsrlw %xmm2, %xmm4, %xmm2
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
@@ -1274,20 +1272,18 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512F-LABEL: splatvar_funnnel_v32i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512F-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
+; AVX512F-NEXT: vpsrlw %xmm2, %xmm4, %xmm2
; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
@@ -1296,89 +1292,79 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm0
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm3
+; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm4, %ymm0
+; AVX512VL-NEXT: vpternlogq $236, %ymm2, %ymm3, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512VBMI2-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 1227bf7dc1d3..143cbf9fa1b4 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -749,150 +749,138 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512F-NEXT: vpandn %xmm5, %xmm2, %xmm6
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw %xmm6, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
-; AVX512F-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
+; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
+; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vpsllw %xmm2, %xmm3, %xmm2
-; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
-; AVX512F-NEXT: vpandq %zmm2, %zmm0, %zmm2
-; AVX512F-NEXT: vpsrlw %xmm6, %xmm3, %xmm0
+; AVX512F-NEXT: vpsllw %xmm4, %xmm3, %xmm4
+; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm4
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
+; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
+; AVX512F-NEXT: vpsrlw %xmm2, %xmm3, %xmm0
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogq $234, %zmm2, %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT: vpandn %xmm5, %xmm2, %xmm6
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm6, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw %xmm6, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
-; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
+; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vpsllw %xmm2, %xmm3, %xmm2
-; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
-; AVX512VL-NEXT: vpandq %zmm2, %zmm0, %zmm2
-; AVX512VL-NEXT: vpsrlw %xmm6, %xmm3, %xmm0
+; AVX512VL-NEXT: vpsllw %xmm4, %xmm3, %xmm4
+; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm4
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
+; AVX512VL-NEXT: vpsrlw %xmm2, %xmm3, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogq $234, %zmm2, %zmm1, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512BW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm0
+; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512BW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm0
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm0
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm0
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: retq
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 5a8ca8d7cd0b..1be5d2e983bf 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -1209,15 +1209,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512F-LABEL: splatvar_funnnel_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld %xmm3, %zmm0, %zmm3
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
@@ -1226,15 +1224,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld %xmm3, %zmm0, %zmm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@@ -1243,15 +1239,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
@@ -1261,15 +1255,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
@@ -1278,15 +1270,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
@@ -1296,15 +1286,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 2b14847b9c14..37b15c61e2bb 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -978,15 +978,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
@@ -994,15 +992,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
@@ -1010,15 +1006,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
@@ -1026,15 +1020,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 05039fd87010..4370d7bcc43f 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -525,20 +525,20 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm3
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm4
+; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX512F-NEXT: vpsubb %ymm3, %ymm6, %ymm3
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
-; AVX512F-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm4
+; AVX512F-NEXT: vpsubb %ymm2, %ymm6, %ymm2
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm4
; AVX512F-NEXT: vpsrlw $2, %ymm4, %ymm6
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
-; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm8
+; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm8
; AVX512F-NEXT: vpblendvb %ymm8, %ymm6, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm6
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
@@ -547,42 +547,41 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpblendvb %ymm10, %ymm6, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm6
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm5
-; AVX512F-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm3
-; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm5
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm2
+; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm5
; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
-; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm5
+; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
+; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm5
; AVX512F-NEXT: vpand %ymm5, %ymm9, %ymm5
-; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
+; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw %xmm1, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm3
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm4
+; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm2
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT: vpsubb %ymm3, %ymm6, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
-; AVX512VL-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm4
+; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm4
; AVX512VL-NEXT: vpsrlw $2, %ymm4, %ymm6
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
-; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8
+; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm8
; AVX512VL-NEXT: vpblendvb %ymm8, %ymm6, %ymm4, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm6
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
@@ -591,31 +590,29 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpblendvb %ymm10, %ymm6, %ymm4, %ymm4
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm6
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm5
-; AVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm3
-; AVX512VL-NEXT: vpsrlw $2, %ymm3, %ymm5
+; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $2, %ymm2, %ymm5
; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5
-; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm5
+; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm9, %ymm5
-; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm3, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
+; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512BW-NEXT: vpsllw %xmm3, %xmm5, %xmm3
@@ -624,7 +621,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
@@ -634,9 +630,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm5, %xmm3
@@ -645,7 +640,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
@@ -655,9 +649,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3
@@ -666,7 +659,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
@@ -676,9 +668,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3
@@ -687,7 +678,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 001c340bff68..d7408e91f6f6 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1196,52 +1196,33 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,31,31,31]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; SSE41-NEXT: psrld %xmm4, %xmm1
; SSE41-NEXT: pandn %xmm3, %xmm2
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; SSE41-NEXT: pslld $1, %xmm0
; SSE41-NEXT: pslld %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_funnnel_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_funnnel_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; AVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrld %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpslld $1, %xmm0, %xmm0
+; AVX-NEXT: vpslld %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v4i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512F-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512F-NEXT: vpslld $1, %xmm0, %xmm0
; AVX512F-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1249,12 +1230,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512VL-LABEL: splatvar_funnnel_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512VL-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512VL-NEXT: vpslld $1, %xmm0, %xmm0
; AVX512VL-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1262,12 +1241,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512BW-LABEL: splatvar_funnnel_v4i32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512BW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512BW-NEXT: vpslld $1, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1285,12 +1262,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX512VLBW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX512VLBW-NEXT: vpslld $1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpslld %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1303,31 +1278,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
-; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
-; XOPAVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; XOPAVX2-NEXT: vpslld $1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrld %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpslld $1, %xmm0, %xmm0
+; XOP-NEXT: vpslld %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
; X86-SSE2: # %bb.0:
@@ -1367,13 +1327,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; SSE41-NEXT: psrlw %xmm4, %xmm1
; SSE41-NEXT: pandn %xmm3, %xmm2
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; SSE41-NEXT: psllw $1, %xmm0
; SSE41-NEXT: psllw %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
@@ -1381,12 +1339,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1394,12 +1350,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1407,12 +1361,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1420,12 +1372,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1443,12 +1393,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1463,12 +1411,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; XOP-LABEL: splatvar_funnnel_v8i16:
; XOP: # %bb.0:
-; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOP-NEXT: vpsllw $1, %xmm0, %xmm0
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1526,43 +1472,41 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; SSE41-LABEL: splatvar_funnnel_v16i8:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; SSE41-NEXT: movdqa %xmm2, %xmm4
-; SSE41-NEXT: pand %xmm3, %xmm4
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: psrlw %xmm4, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pshufb %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,0,0,0]
+; SSE41-NEXT: movdqa %xmm2, %xmm5
+; SSE41-NEXT: pand %xmm4, %xmm5
+; SSE41-NEXT: psrlw %xmm5, %xmm1
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
-; SSE41-NEXT: psrlw %xmm4, %xmm6
-; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SSE41-NEXT: pand %xmm1, %xmm6
-; SSE41-NEXT: pandn %xmm3, %xmm2
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE41-NEXT: psrlw %xmm5, %xmm7
+; SSE41-NEXT: pshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: pand %xmm1, %xmm7
+; SSE41-NEXT: pandn %xmm4, %xmm2
; SSE41-NEXT: paddb %xmm0, %xmm0
-; SSE41-NEXT: psllw %xmm1, %xmm0
-; SSE41-NEXT: psllw %xmm1, %xmm5
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb %xmm1, %xmm5
-; SSE41-NEXT: pand %xmm5, %xmm0
-; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: psllw %xmm2, %xmm0
+; SSE41-NEXT: psllw %xmm2, %xmm6
+; SSE41-NEXT: pshufb %xmm3, %xmm6
+; SSE41-NEXT: pand %xmm6, %xmm0
+; SSE41-NEXT: por %xmm7, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,0,0,0]
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5
+; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpsrlw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsllw %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw %xmm2, %xmm6, %xmm2
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -1570,9 +1514,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX2-LABEL: splatvar_funnnel_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
@@ -1580,7 +1524,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4
; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
@@ -1591,13 +1534,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512F-LABEL: splatvar_funnnel_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512F-NEXT: vpsrld %xmm4, %zmm1, %zmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpslld %xmm2, %zmm0, %zmm0
@@ -1608,13 +1549,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VL-NEXT: vpsrld %xmm4, %zmm1, %zmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld %xmm2, %zmm0, %zmm0
@@ -1625,13 +1564,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
@@ -1643,13 +1580,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512VBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
@@ -1661,13 +1596,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
@@ -1678,13 +1611,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index fe89e86d5b11..02408dd8a05d 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -1241,74 +1241,66 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX2-LABEL: splatvar_funnnel_v32i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX2-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
-; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
-; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v32i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
-; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm3
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VL-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm4, %ymm0
+; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm3, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpaddb %ymm0, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
@@ -1319,12 +1311,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
@@ -1335,12 +1325,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
@@ -1351,12 +1339,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 09f45e811102..9239134fdc09 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -749,140 +749,128 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
-; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
+; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm4
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
-; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
+; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512F-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
+; AVX512F-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
+; AVX512F-NEXT: vpsrlw %xmm2, %xmm4, %xmm0
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
+; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm4
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
-; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
+; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
+; AVX512VL-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
+; AVX512VL-NEXT: vpsrlw %xmm2, %xmm4, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512BW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512BW-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512BW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VLBW-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
-; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
-; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
-; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
+; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
+; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
+; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
+; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
+; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: retq
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 2ac53fe78913..713bb9ebb4c2 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -1292,15 +1292,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512F-LABEL: splatvar_funnnel_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512F-NEXT: vpsrld %xmm3, %zmm0, %zmm3
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
@@ -1309,15 +1307,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpsrld %xmm3, %zmm0, %zmm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@@ -1326,15 +1322,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
@@ -1344,15 +1338,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
@@ -1361,15 +1353,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
@@ -1379,15 +1369,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index ac375e7521d4..9484b9e7a61f 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -1072,15 +1072,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
@@ -1088,15 +1086,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
@@ -1104,15 +1100,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
@@ -1120,15 +1114,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 1e1f810fc525..65682cd5cb4d 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -581,7 +581,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
@@ -623,7 +622,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
@@ -637,9 +635,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
@@ -649,7 +646,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
@@ -658,9 +654,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
@@ -670,7 +665,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
@@ -679,9 +673,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
@@ -691,7 +684,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm2
; AVX512VBMI2-NEXT: vpsllw %xmm1, %xmm5, %xmm0
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
@@ -700,9 +692,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
@@ -712,7 +703,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm2
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %xmm5, %xmm0
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index b151a7d56748..0244beb6745c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1005,52 +1005,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -1079,35 +1057,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: psraw %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -1147,7 +1120,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: psrlw %xmm1, %xmm2
@@ -1162,7 +1134,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
@@ -1177,7 +1148,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index 76b32990a529..792cf6a26168 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -824,52 +824,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -898,35 +876,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -962,7 +935,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: psrlw %xmm1, %xmm2
@@ -973,7 +945,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
@@ -984,7 +955,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index ee2a38089ed4..b84752c02584 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -238,7 +238,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index c10c22b472d1..24cfc6fe385f 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -729,52 +729,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_modulo_shift_v4i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_modulo_shift_v4i32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
+; AVX-LABEL: splatvar_modulo_shift_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; XOPAVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_modulo_shift_v4i32:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -803,35 +781,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -866,7 +839,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: psllw %xmm1, %xmm2
@@ -878,7 +850,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
@@ -890,7 +861,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 655bd6319a6a..d2998938e280 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -231,7 +231,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list