[llvm] 147cfcb - [X86] LowerShiftByScalarVariable - find splat patterns with getSplatSourceVector instead of getSplatValue
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 4 08:55:44 PST 2022
Author: Simon Pilgrim
Date: 2022-03-04T16:47:35Z
New Revision: 147cfcbef1255ba2b4875b76708dab1a685085f5
URL: https://github.com/llvm/llvm-project/commit/147cfcbef1255ba2b4875b76708dab1a685085f5
DIFF: https://github.com/llvm/llvm-project/commit/147cfcbef1255ba2b4875b76708dab1a685085f5.diff
LOG: [X86] LowerShiftByScalarVariable - find splat patterns with getSplatSourceVector instead of getSplatValue
This completes the removal of uses of SelectionDAG::getSplatValue started in D119090 - by avoiding extracting the splatted element we make it a lot easier to zero-extend the bottom 64-bits of the shift amount and fixes issues we had on 32-bit targets where i64 isn't legal.
I've removed the old version of getTargetVShiftNode that took the scalar shift amount argument and LowerRotate can finally efficiently handle vXi16 rotates-by-scalar (using the same code as general funnel-shifts).
The only regression we see is in the X86-AVX2 PR52719 test case in vector-shift-ashr-256.ll - this is now hitting the same problem as the X86-AVX1 case (failure to simplify a multi-use X86ISD::VBROADCAST_LOAD) which I intend to address in a follow up patch.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr15296.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
llvm/test/CodeGen/X86/vselect-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7cdb4f754ba82..12b9195e11e20 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25810,72 +25810,6 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-/// Handle vector element shifts where the shift amount may or may not be a
-/// constant. Takes immediate version of shift as input.
-/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes.
-static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
- SDValue SrcOp, SDValue ShAmt,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- MVT SVT = ShAmt.getSimpleValueType();
- assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
-
- // Change opcode to non-immediate version.
- Opc = getTargetVShiftUniformOpcode(Opc, true);
-
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- // +====================+============+=======================================+
- // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
- // +====================+============+=======================================+
- // | i64 | Yes, No | Use ShAmt as lowest elt |
- // | i32 | Yes | zero-extend in-reg |
- // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
- // | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
- // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
- // +====================+============+=======================================+
-
- if (SVT == MVT::i64)
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
- else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
- ShAmt = ShAmt.getOperand(0);
- MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
- if (Subtarget.hasSSE41())
- ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
- MVT::v2i64, ShAmt);
- else {
- SDValue ByteShift = DAG.getTargetConstant(
- (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
- ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
- ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
- ByteShift);
- ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
- ByteShift);
- }
- } else if (Subtarget.hasSSE41() &&
- ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
- MVT::v2i64, ShAmt);
- } else {
- SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
- DAG.getUNDEF(SVT)};
- ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
- }
-
- // The return type has to be a 128-bit type with the same element
- // type as the input type.
- MVT EltVT = VT.getVectorElementType();
- MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
-
- ShAmt = DAG.getBitcast(ShVT, ShAmt);
- return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
-}
-
/// Return Mask with the necessary casting or extending
/// for \p Mask according to \p MaskVT when lowering masking intrinsics
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
@@ -29341,22 +29275,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
unsigned Opcode = Op.getOpcode();
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
- // TODO: Use getSplatSourceVector.
- if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
- if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
- MVT EltVT = VT.getVectorElementType();
- assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
- if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
-
- return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
- }
- }
-
int BaseShAmtIdx = -1;
if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))
+ return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,
+ Subtarget, DAG);
+
// vXi8 shifts - shift as v8i16 + mask result.
if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
(VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
@@ -30217,11 +30141,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold as unpack(x,x) << zext(splat(y)):
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
- // TODO: Handle vXi16 cases on all targets.
- if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
- (EltSizeInBits == 16 && !Subtarget.hasSSE41())) {
+ if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
int BaseRotAmtIdx = -1;
if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
+ if (EltSizeInBits == 16 && Subtarget.hasSSE41()) {
+ unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
+ return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
+ }
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
@@ -41560,12 +41486,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
switch (Opc) {
case X86ISD::VBROADCAST:
case X86ISD::VBROADCAST_LOAD:
- // TODO: Permit vXi64 types on 32-bit targets.
- if (isTypeLegal(Op.getValueType().getVectorElementType())) {
- UndefElts = APInt::getNullValue(NumElts);
- return true;
- }
- return false;
+ UndefElts = APInt::getNullValue(NumElts);
+ return true;
}
return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll
index 8673cfdafad5a..813c67591ae8d 100644
--- a/llvm/test/CodeGen/X86/pr15296.ll
+++ b/llvm/test/CodeGen/X86/pr15296.ll
@@ -62,11 +62,11 @@ allocas:
define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval) nounwind {
; X86-LABEL: shiftInput___64in32bitmode:
; X86: # %bb.0: # %allocas
-; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
-; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
+; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: shiftInput___64in32bitmode:
@@ -87,11 +87,11 @@ allocas:
define <4 x i64> @shiftInput___2x32bitcast(<4 x i64> %input, i32 %shiftval) nounwind {
; X86-LABEL: shiftInput___2x32bitcast:
; X86: # %bb.0: # %allocas
-; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
-; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
+; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: shiftInput___2x32bitcast:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 4acd6f4a25ebd..2d4710d55926c 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -1156,60 +1156,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
}
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
-; SSE2-LABEL: splatvar_funnnel_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pandn %xmm3, %xmm4
-; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psrlw $1, %xmm1
-; SSE2-NEXT: psrlw %xmm4, %xmm1
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psllw %xmm2, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_funnnel_v8i16:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
-; SSE41-NEXT: movdqa %xmm2, %xmm4
-; SSE41-NEXT: pandn %xmm3, %xmm4
-; SSE41-NEXT: psrlw $1, %xmm1
-; SSE41-NEXT: psrlw %xmm4, %xmm1
-; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: psllw %xmm2, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: splatvar_funnnel_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; AVX1-NEXT: # xmm3 = mem[0,0]
-; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
+; SSE-LABEL: splatvar_funnnel_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
+; SSE-NEXT: movdqa %xmm2, %xmm4
+; SSE-NEXT: pandn %xmm3, %xmm4
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: psrlw %xmm4, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm2
+; SSE-NEXT: psllw %xmm2, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX2-LABEL: splatvar_funnnel_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@@ -1220,7 +1192,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@@ -1231,7 +1203,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@@ -1252,7 +1224,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@@ -1267,41 +1239,25 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
; AVX512VLVBMI2-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; XOPAVX1-NEXT: # xmm3 = mem[0,0]
-; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
-; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
+; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
-; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
-; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psrlw $1, %xmm1
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
; X86-SSE2-NEXT: pand %xmm3, %xmm2
-; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 9f14cea61cc62..9d14f227f3a89 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -981,13 +981,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; AVX1-NEXT: # xmm3 = mem[0,0]
-; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
@@ -1000,52 +999,44 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
@@ -1061,13 +1052,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
@@ -1080,13 +1069,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; XOPAVX1-NEXT: # xmm3 = mem[0,0]
-; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; XOPAVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
@@ -1099,13 +1087,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 6818d6fe379d7..4776f004bef4f 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -544,49 +544,47 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
+; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
+; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
+; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
-; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
-; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
+; AVX512VL-NEXT: vpsrlw $1, %ymm5, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
+; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3
-; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -599,13 +597,11 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 3bf02ea010382..c2ef8ceaba811 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -925,75 +925,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: psllw %xmm2, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
-; SSE41-NEXT: psubw %xmm1, %xmm2
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; SSE41-NEXT: psrlw %xmm1, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pandn %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psrlw $1, %xmm4
+; SSE41-NEXT: psrlw %xmm3, %xmm4
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: psllw %xmm1, %xmm0
+; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 20660c78ce4f5..2be1d69648b8b 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -756,79 +756,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
-; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
+; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
+; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 799abb799b7f2..bfea592ecfb80 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -296,60 +296,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
-; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
+; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
+; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
+; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index aa4d01c859b46..aa490b4966b66 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1249,60 +1249,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
}
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
-; SSE2-LABEL: splatvar_funnnel_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pand %xmm3, %xmm4
-; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psrlw %xmm4, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm2
-; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psllw $1, %xmm0
-; SSE2-NEXT: psllw %xmm2, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_funnnel_v8i16:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
-; SSE41-NEXT: movdqa %xmm2, %xmm4
-; SSE41-NEXT: pand %xmm3, %xmm4
-; SSE41-NEXT: psrlw %xmm4, %xmm1
-; SSE41-NEXT: pandn %xmm3, %xmm2
-; SSE41-NEXT: psllw $1, %xmm0
-; SSE41-NEXT: psllw %xmm2, %xmm0
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: splatvar_funnnel_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; AVX1-NEXT: # xmm3 = mem[0,0]
-; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
+; SSE-LABEL: splatvar_funnnel_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
+; SSE-NEXT: movdqa %xmm2, %xmm4
+; SSE-NEXT: pand %xmm3, %xmm4
+; SSE-NEXT: psrlw %xmm4, %xmm1
+; SSE-NEXT: pandn %xmm3, %xmm2
+; SSE-NEXT: psllw $1, %xmm0
+; SSE-NEXT: psllw %xmm2, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX2-LABEL: splatvar_funnnel_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_funnnel_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -1313,7 +1285,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -1324,7 +1296,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -1345,7 +1317,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -1361,40 +1333,24 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
-; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
-; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; XOPAVX1-NEXT: # xmm3 = mem[0,0]
-; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
-; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
-; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX1-NEXT: retq
-;
-; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
-; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpsllw $1, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; XOPAVX2-NEXT: retq
+; XOP-LABEL: splatvar_funnnel_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
+; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; XOP-NEXT: vpsllw $1, %xmm0, %xmm0
+; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pand %xmm3, %xmm4
-; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
-; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
; X86-SSE2-NEXT: pandn %xmm3, %xmm2
-; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psllw $1, %xmm0
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
; X86-SSE2-NEXT: por %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index e24d1d561c21b..32b2d9f835d82 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -1014,12 +1014,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; AVX1-NEXT: # xmm3 = mem[0,0]
-; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
@@ -1033,12 +1032,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -1046,12 +1043,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -1059,12 +1054,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -1072,12 +1065,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -1094,12 +1085,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -1114,12 +1103,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
-; XOPAVX1-NEXT: # xmm3 = mem[0,0]
-; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
@@ -1133,12 +1121,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 176af60d68706..4501fc0c94931 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -546,15 +546,13 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
@@ -566,15 +564,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
@@ -586,12 +582,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
@@ -606,12 +600,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index a1da14c4c941f..566695fd3656c 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -957,75 +957,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: psrlw %xmm2, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
-; SSE41-NEXT: psubw %xmm1, %xmm2
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psrlw %xmm3, %xmm4
+; SSE41-NEXT: pandn %xmm2, %xmm1
+; SSE41-NEXT: psllw $1, %xmm0
; SSE41-NEXT: psllw %xmm1, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index f80ce78b7982b..7bab44e9f78d5 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -790,79 +790,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
-; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5
+; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index a322679fe46a0..3a533f6bc20fd 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -294,60 +294,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 741ce27185e15..f537a9c5429e3 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -914,75 +914,70 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; SSE41-LABEL: splatvar_rotate_v8i16:
; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: psllw %xmm2, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
-; SSE41-NEXT: psubw %xmm1, %xmm2
-; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; SSE41-NEXT: psrlw %xmm1, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pandn %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psrlw $1, %xmm4
+; SSE41-NEXT: psrlw %xmm3, %xmm4
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: psllw %xmm1, %xmm0
+; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_rotate_v8i16:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index 3591a891f801d..ce452204ebd55 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -577,11 +577,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
-; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
-; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v4i64:
@@ -749,79 +749,74 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_rotate_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
-; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
+; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
+; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
+; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_rotate_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v16i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v16i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index cdc4aa7b75b0d..4402957b4d74f 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -309,60 +309,58 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512F-LABEL: splatvar_rotate_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
-; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
+; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
+; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
+; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
+; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
+; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
+; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
+; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
+; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v32i16:
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index e66d3933333a4..821a9803bed78 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -992,19 +992,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v4i32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: andl $31, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: psrad %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v4i32:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psrad %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psrad %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@@ -1032,9 +1024,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: andl $31, %eax
-; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: psrad %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -1044,19 +1034,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psraw %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v8i16:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psraw %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psraw %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@@ -1085,8 +1067,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psraw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 902547a7b2892..53e5fa4e1f69b 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1099,9 +1099,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1109,17 +1108,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1127,32 +1123,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1160,9 +1150,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -1174,9 +1162,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1185,15 +1172,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1202,29 +1187,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1233,7 +1214,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -2210,7 +2190,8 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index baba294d648b4..b1e6c739fac2d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -169,8 +169,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -245,9 +245,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -259,9 +257,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -270,7 +267,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index e17e721ac4fef..6b5a76f2a0e47 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -811,19 +811,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v4i32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: andl $31, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: psrld %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v4i32:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psrld %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psrld %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@@ -851,9 +843,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: andl $31, %eax
-; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: psrld %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -863,19 +853,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psrlw %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v8i16:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psrlw %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psrlw %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@@ -904,8 +886,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 921be75d4ab88..a36d484319312 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -823,8 +823,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -838,8 +838,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -865,8 +865,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -886,9 +886,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -896,17 +895,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -914,32 +910,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -947,9 +937,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -961,9 +949,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -972,15 +959,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -989,29 +974,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -1020,7 +1001,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 289201da0259f..fa4575dd54e86 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -133,8 +133,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -200,9 +200,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -214,9 +212,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -225,7 +222,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index f063bcadd5c77..73e2b29fb3d82 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -718,19 +718,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v4i32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: andl $31, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pslld %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v4i32:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: pslld %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pslld %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@@ -758,9 +750,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movd %xmm1, %eax
-; X86-SSE-NEXT: andl $31, %eax
-; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslld %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -770,19 +760,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
-; SSE2-LABEL: splatvar_modulo_shift_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psllw %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: splatvar_modulo_shift_v8i16:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: psllw %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: splatvar_modulo_shift_v8i16:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psllw %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@@ -811,8 +793,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psllw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 40450b93c88a9..cfb4fef06fd81 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -510,8 +510,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -525,8 +525,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -552,8 +552,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -572,8 +572,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -587,8 +587,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -614,8 +614,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -748,8 +748,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -763,8 +763,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -790,8 +790,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -811,9 +811,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -821,17 +820,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -839,32 +835,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -872,9 +862,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -886,9 +874,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -897,15 +884,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -914,29 +899,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -945,7 +926,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index c65492dcfe042..04b04ed3f1d2f 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -128,8 +128,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -193,9 +193,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
-; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
-; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@@ -207,9 +205,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -218,7 +215,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index 795a529650674..f8072ff00a135 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -162,11 +162,11 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z, <8 x i32> %w) {
; AVX1-LABEL: blendv_split:
; AVX1: ## %bb.0:
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpslld %xmm2, %xmm4, %xmm5
; AVX1-NEXT: vpslld %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpslld %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm1
@@ -180,8 +180,8 @@ define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32
; AVX2-LABEL: blendv_split:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vmovups %ymm0, (%rdi)
More information about the llvm-commits
mailing list