[llvm] e41a138 - [X86] LowerShiftByScalarVariable - use getSplatSourceVector for vXi8 shift expansion
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 24 03:24:19 PST 2022
Author: Simon Pilgrim
Date: 2022-02-24T11:24:06Z
New Revision: e41a138520b424a15cf4f2d56cf9bb6fbb14ae09
URL: https://github.com/llvm/llvm-project/commit/e41a138520b424a15cf4f2d56cf9bb6fbb14ae09
DIFF: https://github.com/llvm/llvm-project/commit/e41a138520b424a15cf4f2d56cf9bb6fbb14ae09.diff
LOG: [X86] LowerShiftByScalarVariable - use getSplatSourceVector for vXi8 shift expansion
Using getSplatValue causes poor codegen due to not always being able to remove the EXTRACT_VECTOR_ELT created inside getSplatValue.
The vXi16 shifts/rotates are still showing occasional regressions but vXi8 is a definite improvement.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0fc2b164fb0bc..c320259327804 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29304,6 +29304,7 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
+ // TODO: Use getSplatSourceVector.
if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
@@ -29315,7 +29316,10 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
+ }
+ int BaseShAmtIdx = -1;
+ if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
// vXi8 shifts - shift as v8i16 + mask result.
if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
(VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
@@ -29326,13 +29330,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
// Create the mask using vXi16 shifts. For shift-rights we need to move
// the upper byte down before splatting the vXi8 mask.
SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
- BaseShAmt, Subtarget, DAG);
+ BaseShAmt, BaseShAmtIdx, Subtarget, DAG);
if (Opcode != ISD::SHL)
BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
8, DAG);
@@ -29342,7 +29345,7 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
DAG.getBitcast(ExtVT, R), BaseShAmt,
- Subtarget, DAG);
+ BaseShAmtIdx, Subtarget, DAG);
Res = DAG.getBitcast(VT, Res);
Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
@@ -29350,8 +29353,9 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
// ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
// SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
- SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
- BaseShAmt, Subtarget, DAG);
+ SignMask =
+ getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, BaseShAmt,
+ BaseShAmtIdx, Subtarget, DAG);
SignMask = DAG.getBitcast(VT, SignMask);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 27f26e91284ca..e66d3933333a4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1099,8 +1099,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE2-LABEL: splatvar_modulo_shift_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: psrlw %xmm1, %xmm2
@@ -1220,8 +1218,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
; X86-SSE-NEXT: psrlw %xmm1, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 25355a06163ee..322bc7efe4b41 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -861,8 +861,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -963,8 +963,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1245,9 +1245,8 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1267,7 +1266,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
@@ -1307,7 +1305,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
@@ -1332,7 +1329,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
@@ -1355,9 +1351,8 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1377,7 +1372,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X86-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 832a613a0db94..baba294d648b4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -189,8 +189,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -282,9 +282,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -305,7 +304,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index cb9e1341e0967..e17e721ac4fef 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -918,8 +918,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE2-LABEL: splatvar_modulo_shift_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: psrlw %xmm1, %xmm2
@@ -1023,8 +1021,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
; X86-SSE-NEXT: psrlw %xmm1, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 6a0c479ec2399..921be75d4ab88 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -704,8 +704,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -789,8 +789,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1032,9 +1032,8 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1048,7 +1047,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
@@ -1084,7 +1082,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
@@ -1105,7 +1102,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
@@ -1125,9 +1121,8 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
@@ -1141,7 +1136,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X86-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index b84752c025840..289201da0259f 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -153,8 +153,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -237,8 +237,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -253,7 +253,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 24cfc6fe385f5..f063bcadd5c77 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -825,8 +825,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; SSE2-LABEL: splatvar_modulo_shift_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psllw %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: psllw %xmm1, %xmm2
@@ -927,8 +925,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psllw %xmm1, %xmm0
; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
; X86-SSE-NEXT: psllw %xmm1, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index d6aac50ffdbc2..40450b93c88a9 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -634,8 +634,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
@@ -714,8 +714,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
@@ -957,9 +957,8 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v32i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
@@ -974,7 +973,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX2-LABEL: splatvar_modulo_shift_v32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
@@ -1006,7 +1004,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
@@ -1026,7 +1023,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1
@@ -1045,9 +1041,8 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
@@ -1062,7 +1057,6 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
-; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; X86-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index d2998938e280b..c65492dcfe042 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -148,8 +148,8 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -230,8 +230,8 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -245,7 +245,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
More information about the llvm-commits
mailing list