[llvm] 0b91de5 - [X86] Add X86FixupVectorConstantsPass to re-fold AVX512 vector load folds as broadcast folds
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 23 03:01:20 PDT 2023
Author: Simon Pilgrim
Date: 2023-05-23T10:58:17+01:00
New Revision: 0b91de5ea32d40a25c609bf155426fea12c1e2fb
URL: https://github.com/llvm/llvm-project/commit/0b91de5ea32d40a25c609bf155426fea12c1e2fb
DIFF: https://github.com/llvm/llvm-project/commit/0b91de5ea32d40a25c609bf155426fea12c1e2fb.diff
LOG: [X86] Add X86FixupVectorConstantsPass to re-fold AVX512 vector load folds as broadcast folds
This patch analyzes AVX512 instructions for full vector width folded loads from the constant pool and attempts to determine if it can be replaced with a smaller broadcast folded variant. Typically the broadcast opportunities were missed by type-width mismatches or mulituse limitations which have been removed in later passes.
As well as introducing broadcast fold tables (which can hopefully be extended/automated in the future), this also handles mismatches in the AND/ANDN/OR/XOR/TERNLOG type-widths, catching additional missed opportunities.
This is patch is pulled from the ongoing work based on D150143, but without removing the existing DAG constant broadcast lowering code - this patch is currently a late stage cleanup only.
The intention is to add additional broadcast/extension handling of constants in future patches, but it turned out that AVX512 broadcast handling was the easiest to start with.
Differential Revision: https://reviews.llvm.org/D150526
Added:
llvm/lib/Target/X86/X86FixupVectorConstants.cpp
Modified:
llvm/lib/Target/X86/CMakeLists.txt
llvm/lib/Target/X86/X86.h
llvm/lib/Target/X86/X86InstrFoldTables.cpp
llvm/lib/Target/X86/X86InstrFoldTables.h
llvm/lib/Target/X86/X86TargetMachine.cpp
llvm/test/CodeGen/X86/avx512-calling-conv.ll
llvm/test/CodeGen/X86/avx512-ext.ll
llvm/test/CodeGen/X86/avx512-logic.ll
llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
llvm/test/CodeGen/X86/avx512vl-logic.ll
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/test/CodeGen/X86/combine-and.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/dpbusd_const.ll
llvm/test/CodeGen/X86/dpbusd_i4.ll
llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
llvm/test/CodeGen/X86/gfni-rotates.ll
llvm/test/CodeGen/X86/gfni-shifts.ll
llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/test/CodeGen/X86/i64-to-float.ll
llvm/test/CodeGen/X86/icmp-pow2-diff.ll
llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
llvm/test/CodeGen/X86/paddus.ll
llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
llvm/test/CodeGen/X86/prefer-avx256-mulo.ll
llvm/test/CodeGen/X86/prefer-avx256-shift.ll
llvm/test/CodeGen/X86/prefer-avx256-trunc.ll
llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
llvm/test/CodeGen/X86/psubus.ll
llvm/test/CodeGen/X86/rotate-extract-vector.ll
llvm/test/CodeGen/X86/rotate_vec.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/usub_sat_vec.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
llvm/test/CodeGen/X86/vector-lzcnt-128.ll
llvm/test/CodeGen/X86/vector-lzcnt-256.ll
llvm/test/CodeGen/X86/vector-lzcnt-512.ll
llvm/test/CodeGen/X86/vector-mul.ll
llvm/test/CodeGen/X86/vector-pack-128.ll
llvm/test/CodeGen/X86/vector-pack-256.ll
llvm/test/CodeGen/X86/vector-pack-512.ll
llvm/test/CodeGen/X86/vector-pcmp.ll
llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
llvm/test/CodeGen/X86/vector-reduce-smax.ll
llvm/test/CodeGen/X86/vector-reduce-smin.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/test/CodeGen/X86/vselect-pcmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 9aa5fce16cb29..e79bed6409315 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -45,6 +45,7 @@ set(sources
X86FixupBWInsts.cpp
X86FixupLEAs.cpp
X86FixupInstTuning.cpp
+ X86FixupVectorConstants.cpp
X86AvoidStoreForwardingBlocks.cpp
X86DynAllocaExpander.cpp
X86FixupSetCC.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index e15f71828da16..76ecc77bc39c2 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -60,10 +60,13 @@ FunctionPass *createX86PadShortFunctions();
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
-/// Return as pass that replaces equivilent slower instructions with faster
+/// Return a pass that replaces equivalent slower instructions with faster
/// ones.
FunctionPass *createX86FixupInstTuning();
+/// Return a pass that reduces the size of vector constant pool loads.
+FunctionPass *createX86FixupVectorConstants();
+
/// Return a pass that removes redundant LEA instructions and redundant address
/// recalculations.
FunctionPass *createX86OptimizeLEAs();
@@ -171,6 +174,7 @@ void initializeFixupBWInstPassPass(PassRegistry &);
void initializeFixupLEAPassPass(PassRegistry &);
void initializeX86ArgumentStackSlotPassPass(PassRegistry &);
void initializeX86FixupInstTuningPassPass(PassRegistry &);
+void initializeX86FixupVectorConstantsPassPass(PassRegistry &);
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
void initializeX86AvoidTrailingCallPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
new file mode 100644
index 0000000000000..3e683cb872531
--- /dev/null
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -0,0 +1,309 @@
+//===-- X86FixupVectorConstants.cpp - optimize constant generation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file examines all full size vector constant pool loads and attempts to
+// replace them with smaller constant pool entries, including:
+// * Converting AVX512 memory-fold instructions to their broadcast-fold form
+// * TODO: Broadcasting of full width loads.
+// * TODO: Sign/Zero extension of full width loads.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrFoldTables.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-fixup-vector-constants"
+
+STATISTIC(NumInstChanges, "Number of instructions changes");
+
+namespace {
+class X86FixupVectorConstantsPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Fixup Vector Constants";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineInstr &MI);
+
+ // This pass runs after regalloc and doesn't support VReg operands.
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ const X86InstrInfo *TII = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const MCSchedModel *SM = nullptr;
+};
+} // end anonymous namespace
+
+char X86FixupVectorConstantsPass::ID = 0;
+
+INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
+
+FunctionPass *llvm::createX86FixupVectorConstants() {
+ return new X86FixupVectorConstantsPass();
+}
+
+static const Constant *getConstantFromPool(const MachineInstr &MI,
+ const MachineOperand &Op) {
+ if (!Op.isCPI() || Op.getOffset() != 0)
+ return nullptr;
+
+ ArrayRef<MachineConstantPoolEntry> Constants =
+ MI.getParent()->getParent()->getConstantPool()->getConstants();
+ const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
+
+ // Bail if this is a machine constant pool entry, we won't be able to dig out
+ // anything useful.
+ if (ConstantEntry.isMachineConstantPoolEntry())
+ return nullptr;
+
+ return ConstantEntry.Val.ConstVal;
+}
+
+// Attempt to extract the full width of bits data from the constant.
+static std::optional<APInt> extractConstantBits(const Constant *C) {
+ unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
+
+ if (auto *CInt = dyn_cast<ConstantInt>(C))
+ return CInt->getValue();
+
+ if (auto *CFP = dyn_cast<ConstantFP>(C))
+ return CFP->getValue().bitcastToAPInt();
+
+ if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
+ if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
+ assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
+ return APInt::getSplat(NumBits, *Bits);
+ }
+ }
+ }
+
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ bool IsInteger = CDS->getElementType()->isIntegerTy();
+ bool IsFloat = CDS->getElementType()->isHalfTy() ||
+ CDS->getElementType()->isBFloatTy() ||
+ CDS->getElementType()->isFloatTy() ||
+ CDS->getElementType()->isDoubleTy();
+ if (IsInteger || IsFloat) {
+ APInt Bits = APInt::getZero(NumBits);
+ unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (IsInteger)
+ Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
+ else
+ Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
+ I * EltBits);
+ }
+ return Bits;
+ }
+ }
+
+ return std::nullopt;
+}
+
+// Attempt to compute the splat width of bits data by normalizing the splat to
+// remove undefs.
+static std::optional<APInt> getSplatableConstant(const Constant *C,
+ unsigned SplatBitWidth) {
+ const Type *Ty = C->getType();
+ assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
+ "Illegal splat width");
+
+ if (std::optional<APInt> Bits = extractConstantBits(C))
+ if (Bits->isSplat(SplatBitWidth))
+ return Bits->trunc(SplatBitWidth);
+
+ // Detect general splats with undefs.
+ // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
+ if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ unsigned NumOps = CV->getNumOperands();
+ unsigned NumEltsBits = Ty->getScalarSizeInBits();
+ unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
+ if ((SplatBitWidth % NumEltsBits) == 0) {
+ // Collect the elements and ensure that within the repeated splat sequence
+ // they either match or are undef.
+ SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
+ for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
+ if (Constant *Elt = CV->getAggregateElement(Idx)) {
+ if (isa<UndefValue>(Elt))
+ continue;
+ unsigned SplatIdx = Idx % NumScaleOps;
+ if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
+ Sequence[SplatIdx] = Elt;
+ continue;
+ }
+ }
+ return std::nullopt;
+ }
+ // Extract the constant bits forming the splat and insert into the bits
+ // data, leave undef as zero.
+ APInt SplatBits = APInt::getZero(SplatBitWidth);
+ for (unsigned I = 0; I != NumScaleOps; ++I) {
+ if (!Sequence[I])
+ continue;
+ if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
+ SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
+ continue;
+ }
+ return std::nullopt;
+ }
+ return SplatBits;
+ }
+ }
+
+ return std::nullopt;
+}
+
+// Attempt to rebuild a normalized splat vector constant of the requested splat
+// width, built up of potentially smaller scalar values.
+// NOTE: We don't always bother converting to scalars if the vector length is 1.
+static Constant *rebuildSplatableConstant(const Constant *C,
+ unsigned SplatBitWidth) {
+ std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
+ if (!Splat)
+ return nullptr;
+
+ // Determine scalar size to use for the constant splat vector, clamping as we
+ // might have found a splat smaller than the original constant data.
+ const Type *OriginalType = C->getType();
+ Type *SclTy = OriginalType->getScalarType();
+ unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
+ NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
+
+ if (NumSclBits == 8) {
+ SmallVector<uint8_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 8)
+ RawBits.push_back(Splat->extractBits(8, I).getZExtValue());
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ if (NumSclBits == 16) {
+ SmallVector<uint16_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 16)
+ RawBits.push_back(Splat->extractBits(16, I).getZExtValue());
+ if (SclTy->is16bitFPTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ if (NumSclBits == 32) {
+ SmallVector<uint32_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 32)
+ RawBits.push_back(Splat->extractBits(32, I).getZExtValue());
+ if (SclTy->isFloatTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ // Fallback to i64 / double.
+ SmallVector<uint64_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 64)
+ RawBits.push_back(Splat->extractBits(64, I).getZExtValue());
+ if (SclTy->isDoubleTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+}
+
+bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
+
+ auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
+ unsigned OpBcst64, unsigned OpBcst32,
+ unsigned OpBcst16, unsigned OpBcst8,
+ unsigned OperandNo) {
+ assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+
+ MachineOperand &CstOp = MI.getOperand(OperandNo + X86::AddrDisp);
+ if (auto *C = getConstantFromPool(MI, CstOp)) {
+ // Attempt to detect a suitable splat from increasing splat widths.
+ std::pair<unsigned, unsigned> Broadcasts[] = {
+ {8, OpBcst8}, {16, OpBcst16}, {32, OpBcst32},
+ {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
+ };
+ for (auto [BitWidth, OpBcst] : Broadcasts) {
+ if (OpBcst) {
+ // Construct a suitable splat constant and adjust the MI to
+ // use the new constant pool entry.
+ if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) {
+ unsigned NewCPI =
+ CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
+ MI.setDesc(TII->get(OpBcst));
+ CstOp.setIndex(NewCPI);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ };
+
+ // Attempt to find a AVX512 mapping from a full width memory-fold instruction
+ // to a broadcast-fold instruction variant.
+ if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
+ unsigned OpBcst32 = 0, OpBcst64 = 0;
+ unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(Opc, 32)) {
+ OpBcst32 = Mem2Bcst->DstOp;
+ OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(Opc, 64)) {
+ OpBcst64 = Mem2Bcst->DstOp;
+ OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
+ assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
+ "OperandNo mismatch");
+
+ if (OpBcst32 || OpBcst64) {
+ unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
+ return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
+ }
+ }
+
+ return false;
+}
+
+bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
+ bool Changed = false;
+ ST = &MF.getSubtarget<X86Subtarget>();
+ TII = ST->getInstrInfo();
+ SM = &ST->getSchedModel();
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (processInstruction(MF, MBB, MI)) {
+ ++NumInstChanges;
+ Changed = true;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
+ return Changed;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 51b37fdc626e1..e1feca25469b8 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -30,6 +30,18 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS },
{ X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS },
{ X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS },
+ { X86::VANDNPDZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDNPDZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDNPDZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
+ { X86::VANDNPSZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDNPSZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDNPSZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
+ { X86::VANDPDZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDPDZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDPDZrr, X86::VANDPDZrmb, TB_BCAST_SD },
+ { X86::VANDPSZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDPSZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDPSZrr, X86::VANDPSZrmb, TB_BCAST_SS },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD },
{ X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD },
@@ -72,6 +84,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS },
{ X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS },
{ X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS },
+ { X86::VORPDZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
+ { X86::VORPDZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
+ { X86::VORPDZrr, X86::VORPDZrmb, TB_BCAST_SD },
+ { X86::VORPSZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
+ { X86::VORPSZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
+ { X86::VORPSZrr, X86::VORPSZrmb, TB_BCAST_SS },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D },
{ X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D },
{ X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D },
@@ -174,6 +192,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS },
{ X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS },
{ X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
+ { X86::VXORPDZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
+ { X86::VXORPDZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
+ { X86::VXORPDZrr, X86::VXORPDZrmb, TB_BCAST_SD },
+ { X86::VXORPSZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
+ { X86::VXORPSZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
+ { X86::VXORPSZrr, X86::VXORPSZrmb, TB_BCAST_SS },
};
static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
@@ -293,6 +317,68 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
{ X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
};
+// Table to map instructions safe to broadcast using a
diff erent width from the
+// element width.
+static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = {
+ { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
+ { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
+ { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS },
+ { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD },
+ { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
+ { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
+ { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS },
+ { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
+ { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
+ { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD },
+ { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q },
+ { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
+ { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
+ { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
+ { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D },
+ { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
+ { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
+ { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D },
+ { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
+ { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
+ { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q },
+ { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
+ { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
+ { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D },
+ { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
+ { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
+ { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q },
+ { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
+ { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
+ { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D },
+ { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
+ { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
+ { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS },
+ { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
+ { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
+ { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD },
+};
+
+static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = {
+ { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
+};
+
static const X86MemoryFoldTableEntry *
lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
#ifndef NDEBUG
@@ -339,6 +425,16 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
std::end(BroadcastFoldTable3)) ==
std::end(BroadcastFoldTable3) &&
"BroadcastFoldTable3 is not sorted and unique!");
+ assert(llvm::is_sorted(BroadcastSizeFoldTable2) &&
+ std::adjacent_find(std::begin(BroadcastSizeFoldTable2),
+ std::end(BroadcastSizeFoldTable2)) ==
+ std::end(BroadcastSizeFoldTable2) &&
+ "BroadcastSizeFoldTable2 is not sorted and unique!");
+ assert(llvm::is_sorted(BroadcastSizeFoldTable3) &&
+ std::adjacent_find(std::begin(BroadcastSizeFoldTable3),
+ std::end(BroadcastSizeFoldTable3)) ==
+ std::end(BroadcastSizeFoldTable3) &&
+ "BroadcastSizeFoldTable3 is not sorted and unique!");
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -444,3 +540,85 @@ llvm::lookupUnfoldTable(unsigned MemOp) {
return nullptr;
}
+namespace {
+
+// This class stores the memory -> broadcast folding tables. It is instantiated
+// as a function scope static variable to lazily init the folding table.
+struct X86MemBroadcastFoldTable {
+ // Stores memory broadcast folding tables entries sorted by opcode.
+ std::vector<X86MemoryFoldTableEntry> Table;
+
+ X86MemBroadcastFoldTable() {
+ // Broadcast tables.
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable2) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable2) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable3) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable3) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+
+ // Sort the memory->broadcast fold table.
+ array_pod_sort(Table.begin(), Table.end());
+ }
+};
+} // namespace
+
+static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry,
+ unsigned BroadcastBits) {
+ switch (Entry.Flags & TB_BCAST_MASK) {
+ case TB_BCAST_SD:
+ case TB_BCAST_Q:
+ return BroadcastBits == 64;
+ case TB_BCAST_SS:
+ case TB_BCAST_D:
+ return BroadcastBits == 32;
+ }
+ return false;
+}
+
+const X86MemoryFoldTableEntry *
+llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) {
+ static X86MemBroadcastFoldTable MemBroadcastFoldTable;
+ auto &Table = MemBroadcastFoldTable.Table;
+ for (auto I = llvm::lower_bound(Table, MemOp);
+ I != Table.end() && I->KeyOp == MemOp; ++I) {
+ if (matchBroadcastSize(*I, BroadcastBits))
+ return &*I;
+ }
+ return nullptr;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.h b/llvm/lib/Target/X86/X86InstrFoldTables.h
index e1458e383fa78..28db61d9a3f83 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.h
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.h
@@ -47,6 +47,11 @@ const X86MemoryFoldTableEntry *lookupFoldTable(unsigned RegOp, unsigned OpNum);
// Look up the memory unfolding table entry for this instruction.
const X86MemoryFoldTableEntry *lookupUnfoldTable(unsigned MemOp);
+// Look up the broadcast memory folding table entry for this instruction from
+// the regular memory instruction.
+const X86MemoryFoldTableEntry *lookupBroadcastFoldTable(unsigned MemOp,
+ unsigned BroadcastBits);
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 1afc596345322..c096e6dd9686c 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -574,6 +574,7 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
addPass(createX86FixupInstTuning());
+ addPass(createX86FixupVectorConstants());
}
addPass(createX86EvexToVexInsts());
addPass(createX86DiscriminateMemOpsPass());
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index 1b5b12308828f..9411aad9a21e4 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -286,7 +286,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _func8xi1
-; SKX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
@@ -310,7 +310,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; FASTISEL-NEXT: vpmovm2w %k0, %xmm0
; FASTISEL-NEXT: vzeroupper
; FASTISEL-NEXT: callq _func8xi1
-; FASTISEL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; FASTISEL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; FASTISEL-NEXT: popq %rax
; FASTISEL-NEXT: retq
%cmpRes = icmp sgt <8 x i32>%a, %b
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 6fb8ba9f0ea27..906d7cb6854f4 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -2895,7 +2895,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_64xi1_to_64xi8:
@@ -2911,7 +2911,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2
; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: retq
%mask = icmp eq <64 x i8> %x, %y
%1 = zext <64 x i1> %mask to <64 x i8>
@@ -2926,7 +2926,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_32xi1_to_32xi16:
@@ -2943,7 +2943,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512DQNOBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQNOBW-NEXT: retq
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i16>
@@ -2994,7 +2994,7 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQNOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512DQNOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: retq
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i8>
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index 0b76dc0b2f3bb..e53e194ba05c2 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -889,7 +889,7 @@ define <16 x i32> @ternlog_xor_andn(<16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
; ALL-LABEL: ternlog_or_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; ALL-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; ALL-NEXT: retq
%a = and <16 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%b = or <16 x i32> %a, %y
@@ -899,7 +899,7 @@ define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
; ALL-LABEL: ternlog_xor_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; ALL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; ALL-NEXT: retq
%a = and <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%b = xor <8 x i64> %a, %y
@@ -909,7 +909,7 @@ define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x i32> %mask) {
; ALL-LABEL: ternlog_maskz_or_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3
+; ALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3
; ALL-NEXT: vpsrad $31, %zmm2, %zmm0
; ALL-NEXT: vpternlogd $224, %zmm1, %zmm3, %zmm0
; ALL-NEXT: retq
@@ -923,7 +923,7 @@ define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
define <8 x i64> @ternlog_maskz_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i64> %mask) {
; ALL-LABEL: ternlog_maskz_xor_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3
+; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm3
; ALL-NEXT: vpsraq $63, %zmm2, %zmm0
; ALL-NEXT: vpternlogq $96, %zmm1, %zmm3, %zmm0
; ALL-NEXT: retq
@@ -939,14 +939,14 @@ define <16 x i32> @ternlog_maskx_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
-; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm2
; KNL-NEXT: vpord %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_or_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %zmm2, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm2
; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <16 x i32> %mask, zeroinitializer
@@ -961,7 +961,7 @@ define <16 x i32> @ternlog_masky_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1
-; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: vpord %zmm1, %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL-NEXT: retq
@@ -969,7 +969,7 @@ define <16 x i32> @ternlog_masky_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x
; SKX-LABEL: ternlog_masky_or_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %zmm2, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: vorps %zmm1, %zmm0, %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
@@ -985,14 +985,14 @@ define <8 x i64> @ternlog_maskx_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
-; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm2
; KNL-NEXT: vpxorq %zmm1, %zmm2, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_xor_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %zmm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm2
; SKX-NEXT: vxorpd %zmm1, %zmm2, %zmm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <8 x i64> %mask, zeroinitializer
@@ -1007,7 +1007,7 @@ define <8 x i64> @ternlog_masky_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
-; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm1 {%k1}
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
; KNL-NEXT: retq
@@ -1015,7 +1015,7 @@ define <8 x i64> @ternlog_masky_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i6
; SKX-LABEL: ternlog_masky_xor_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %zmm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm1 {%k1}
; SKX-NEXT: vmovapd %zmm1, %zmm0
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
index b1bedcf157eed..f3f4223b6e877 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -740,7 +740,7 @@ define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
; CHECK-LABEL: test_u1tofp2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovqw %xmm0, %xmm0
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
; CHECK-NEXT: retq
%res = uitofp <2 x i1> %arg0 to <2 x half>
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 7da3cf984175b..58621967e2aca 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -1039,7 +1039,7 @@ define <4 x i32> @ternlog_xor_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; CHECK-NEXT: retq
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%b = or <4 x i32> %a, %y
@@ -1049,7 +1049,7 @@ define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; CHECK-NEXT: vpternlogd $236, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; CHECK-NEXT: retq
%a = and <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
%b = or <8 x i32> %a, %y
@@ -1059,7 +1059,7 @@ define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
; CHECK-NEXT: retq
%a = and <2 x i64> %x, <i64 1099511627775, i64 1099511627775>
%b = xor <2 x i64> %a, %y
@@ -1069,7 +1069,7 @@ define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
; CHECK-NEXT: retq
%a = and <4 x i64> %x, <i64 72057594037927935, i64 72057594037927935, i64 72057594037927935, i64 72057594037927935>
%b = xor <4 x i64> %a, %y
@@ -1079,7 +1079,7 @@ define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %mask) {
; CHECK-LABEL: ternlog_maskz_or_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; CHECK-NEXT: vpsrad $31, %xmm3, %xmm0
; CHECK-NEXT: vpternlogd $224, %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq
@@ -1093,7 +1093,7 @@ define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32
define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
; CHECK-LABEL: ternlog_maskz_or_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm3
; CHECK-NEXT: vpsrad $31, %ymm2, %ymm0
; CHECK-NEXT: vpternlogd $224, %ymm1, %ymm3, %ymm0
; CHECK-NEXT: retq
@@ -1107,7 +1107,7 @@ define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x
define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
; CHECK-LABEL: ternlog_maskz_xor_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
+; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm3
; CHECK-NEXT: vpsraq $63, %xmm2, %xmm0
; CHECK-NEXT: vpternlogq $96, %xmm1, %xmm3, %xmm0
; CHECK-NEXT: retq
@@ -1121,7 +1121,7 @@ define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6
define <4 x i64> @ternlog_maskz_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
; CHECK-LABEL: ternlog_maskz_xor_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
+; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm3
; CHECK-NEXT: vpsraq $63, %ymm2, %ymm0
; CHECK-NEXT: vpternlogq $96, %ymm1, %ymm3, %ymm0
; CHECK-NEXT: retq
@@ -1137,14 +1137,14 @@ define <4 x i32> @ternlog_maskx_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtd %xmm3, %xmm2, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; KNL-NEXT: vpord %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_or_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %xmm3, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; SKX-NEXT: vorps %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <4 x i32> %mask, zeroinitializer
@@ -1159,14 +1159,14 @@ define <8 x i32> @ternlog_maskx_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm2
; KNL-NEXT: vpord %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_or_and_mask_ymm:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %ymm2, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm2
; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <8 x i32> %mask, zeroinitializer
@@ -1181,14 +1181,14 @@ define <2 x i64> @ternlog_maskx_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2
; KNL-NEXT: vpxorq %xmm1, %xmm2, %xmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_xor_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %xmm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm2
; SKX-NEXT: vxorpd %xmm1, %xmm2, %xmm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <2 x i64> %mask, zeroinitializer
@@ -1203,14 +1203,14 @@ define <4 x i64> @ternlog_maskx_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm2
; KNL-NEXT: vpxorq %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_maskx_xor_and_mask_ymm:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %ymm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm2
; SKX-NEXT: vxorpd %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <4 x i64> %mask, zeroinitializer
@@ -1225,7 +1225,7 @@ define <4 x i32> @ternlog_masky_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpgtd %xmm3, %xmm2, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; KNL-NEXT: vpord %xmm1, %xmm0, %xmm1 {%k1}
; KNL-NEXT: vmovdqa %xmm1, %xmm0
; KNL-NEXT: retq
@@ -1233,7 +1233,7 @@ define <4 x i32> @ternlog_masky_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32
; SKX-LABEL: ternlog_masky_or_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %xmm3, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; SKX-NEXT: vorps %xmm1, %xmm0, %xmm1 {%k1}
; SKX-NEXT: vmovaps %xmm1, %xmm0
; SKX-NEXT: retq
@@ -1249,14 +1249,14 @@ define <8 x i32> @ternlog_masky_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm2
; KNL-NEXT: vpord %ymm1, %ymm2, %ymm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: ternlog_masky_or_and_mask_ymm:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovd2m %ymm2, %k1
-; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm2
; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1}
; SKX-NEXT: retq
%m = icmp slt <8 x i32> %mask, zeroinitializer
@@ -1271,7 +1271,7 @@ define <2 x i64> @ternlog_masky_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm1 {%k1}
; KNL-NEXT: vmovdqa %xmm1, %xmm0
; KNL-NEXT: retq
@@ -1279,7 +1279,7 @@ define <2 x i64> @ternlog_masky_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6
; SKX-LABEL: ternlog_masky_xor_and_mask:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %xmm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm1 {%k1}
; SKX-NEXT: vmovapd %xmm1, %xmm0
; SKX-NEXT: retq
@@ -1295,7 +1295,7 @@ define <4 x i64> @ternlog_masky_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4
; KNL: ## %bb.0:
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
-; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm1 {%k1}
; KNL-NEXT: vmovdqa %ymm1, %ymm0
; KNL-NEXT: retq
@@ -1303,7 +1303,7 @@ define <4 x i64> @ternlog_masky_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4
; SKX-LABEL: ternlog_masky_xor_and_mask_ymm:
; SKX: ## %bb.0:
; SKX-NEXT: vpmovq2m %ymm2, %k1
-; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm1 {%k1}
; SKX-NEXT: vmovapd %ymm1, %ymm0
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index f903e3696042f..c092ed4f9f668 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -1086,7 +1086,7 @@ define i1 @trunc_v64i8_cmp(<64 x i8> %a0) nounwind {
;
; AVX512-LABEL: trunc_v64i8_cmp:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/combine-and.ll b/llvm/test/CodeGen/X86/combine-and.ll
index d821070c13e0f..43c85fdc703bf 100644
--- a/llvm/test/CodeGen/X86/combine-and.ll
+++ b/llvm/test/CodeGen/X86/combine-and.ll
@@ -546,12 +546,26 @@ define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-NEXT: paddb %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: PR34620:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: PR34620:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR34620:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: PR34620:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = add <16 x i8> %2, %a1
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 80455eabe9f69..0f5f28a857940 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -249,11 +249,23 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; AVX1-NEXT: retq
;
-; AVX2ORLATER-LABEL: combine_vec_sdiv_by_pos1:
-; AVX2ORLATER: # %bb.0:
-; AVX2ORLATER-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2ORLATER-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2ORLATER-NEXT: retq
+; AVX2-LABEL: combine_vec_sdiv_by_pos1:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: combine_vec_sdiv_by_pos1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: combine_vec_sdiv_by_pos1:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: retq
;
; XOP-LABEL: combine_vec_sdiv_by_pos1:
; XOP: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll
index b0ffb23c9ced3..7ccbcd4d0d699 100644
--- a/llvm/test/CodeGen/X86/dpbusd_const.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_const.ll
@@ -89,7 +89,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
; AVX512VLVNNI-LABEL: mul_4xi4_cz:
; AVX512VLVNNI: # %bb.0: # %entry
; AVX512VLVNNI-NEXT: vpmovdb %xmm0, %xmm0
-; AVX512VLVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLVNNI-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax
diff --git a/llvm/test/CodeGen/X86/dpbusd_i4.ll b/llvm/test/CodeGen/X86/dpbusd_i4.ll
index 1c045344dcd0a..2e25b5a0e1c03 100644
--- a/llvm/test/CodeGen/X86/dpbusd_i4.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_i4.ll
@@ -29,7 +29,7 @@ entry:
define i32 @mul_i4i8(<16 x i4> %a, <16 x i8> %b, i32 %c) {
; CHECK-LABEL: mul_i4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpdpbusd %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
@@ -54,9 +54,9 @@ define i32 @mul_i4i4(<16 x i4> %a, <16 x i4> %b, i32 %c) {
; CHECK-NEXT: vpsllw $4, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $4, %xmm1, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; CHECK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; CHECK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpdpbusd %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
index 1f5236311b463..74eb3a56ef672 100644
--- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
+++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
@@ -59,7 +59,7 @@ define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $7, %xmm1, %xmm1
; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm0
-; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; GFNIAVX512-NEXT: retq
%res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>)
ret <16 x i8> %res
@@ -252,7 +252,7 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $7, %zmm1, %zmm1
; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; GFNIAVX512-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll
index 44581cb97e033..7b79b02751164 100644
--- a/llvm/test/CodeGen/X86/gfni-rotates.ll
+++ b/llvm/test/CodeGen/X86/gfni-rotates.ll
@@ -61,7 +61,7 @@ define <16 x i8> @splatconstant_rotr_v16i8(<16 x i8> %a) nounwind {
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $7, %xmm0, %xmm1
; GFNIAVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm0
-; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; GFNIAVX512-NEXT: retq
%res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>)
ret <16 x i8> %res
@@ -256,7 +256,7 @@ define <64 x i8> @splatconstant_rotl_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $7, %zmm0, %zmm1
; GFNIAVX512-NEXT: vpaddb %zmm0, %zmm0, %zmm0
-; GFNIAVX512-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; GFNIAVX512-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; GFNIAVX512-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/gfni-shifts.ll b/llvm/test/CodeGen/X86/gfni-shifts.ll
index 421d0053141af..d5ed003c45092 100644
--- a/llvm/test/CodeGen/X86/gfni-shifts.ll
+++ b/llvm/test/CodeGen/X86/gfni-shifts.ll
@@ -15,11 +15,17 @@ define <16 x i8> @splatconstant_shl_v16i8(<16 x i8> %a) nounwind {
; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: splatconstant_shl_v16i8:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vpsllw $3, %xmm0, %xmm0
-; GFNIAVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: retq
+; GFNIAVX1OR2-LABEL: splatconstant_shl_v16i8:
+; GFNIAVX1OR2: # %bb.0:
+; GFNIAVX1OR2-NEXT: vpsllw $3, %xmm0, %xmm0
+; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1OR2-NEXT: retq
+;
+; GFNIAVX512-LABEL: splatconstant_shl_v16i8:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; GFNIAVX512-NEXT: retq
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
@@ -31,11 +37,17 @@ define <16 x i8> @splatconstant_lshr_v16i8(<16 x i8> %a) nounwind {
; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; GFNISSE-NEXT: retq
;
-; GFNIAVX-LABEL: splatconstant_lshr_v16i8:
-; GFNIAVX: # %bb.0:
-; GFNIAVX-NEXT: vpsrlw $7, %xmm0, %xmm0
-; GFNIAVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; GFNIAVX-NEXT: retq
+; GFNIAVX1OR2-LABEL: splatconstant_lshr_v16i8:
+; GFNIAVX1OR2: # %bb.0:
+; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm0
+; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; GFNIAVX1OR2-NEXT: retq
+;
+; GFNIAVX512-LABEL: splatconstant_lshr_v16i8:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vpsrlw $7, %xmm0, %xmm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; GFNIAVX512-NEXT: retq
%shift = lshr <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %shift
}
@@ -63,7 +75,7 @@ define <16 x i8> @splatconstant_ashr_v16i8(<16 x i8> %a) nounwind {
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $4, %xmm0, %xmm0
; GFNIAVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; GFNIAVX512-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; GFNIAVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; GFNIAVX512-NEXT: retq
%shift = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -104,7 +116,7 @@ define <32 x i8> @splatconstant_shl_v32i8(<32 x i8> %a) nounwind {
; GFNIAVX512-LABEL: splatconstant_shl_v32i8:
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsllw $6, %ymm0, %ymm0
-; GFNIAVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; GFNIAVX512-NEXT: retq
%shift = shl <32 x i8> %a, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
ret <32 x i8> %shift
@@ -140,7 +152,7 @@ define <32 x i8> @splatconstant_lshr_v32i8(<32 x i8> %a) nounwind {
; GFNIAVX512-LABEL: splatconstant_lshr_v32i8:
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
-; GFNIAVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; GFNIAVX512-NEXT: retq
%shift = lshr <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <32 x i8> %shift
@@ -190,7 +202,7 @@ define <32 x i8> @splatconstant_ashr_v32i8(<32 x i8> %a) nounwind {
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $2, %ymm0, %ymm0
; GFNIAVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; GFNIAVX512-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; GFNIAVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; GFNIAVX512-NEXT: retq
%shift = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -244,7 +256,7 @@ define <64 x i8> @splatconstant_shl_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512-LABEL: splatconstant_shl_v64i8:
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsllw $5, %zmm0, %zmm0
-; GFNIAVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; GFNIAVX512-NEXT: retq
%shift = shl <64 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
ret <64 x i8> %shift
@@ -293,7 +305,7 @@ define <64 x i8> @splatconstant_lshr_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512-LABEL: splatconstant_lshr_v64i8:
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $7, %zmm0, %zmm0
-; GFNIAVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; GFNIAVX512-NEXT: retq
%shift = lshr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %shift
@@ -366,9 +378,11 @@ define <64 x i8> @splatconstant_ashr_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
; GFNIAVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; GFNIAVX512-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; GFNIAVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; GFNIAVX512-NEXT: retq
%shift = ashr <64 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <64 x i8> %shift
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFNIAVX: {{.*}}
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index 65b805c460918..93049f9987a5e 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -5,8 +5,8 @@
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
;
@@ -86,21 +86,13 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X64-SSE42-NEXT: movq %xmm2, %rax
; X64-SSE42-NEXT: retq
;
-; X64-AVX1-LABEL: test_reduce_v2i64:
-; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; X64-AVX1-NEXT: vmovq %xmm0, %rax
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: test_reduce_v2i64:
-; X64-AVX2: ## %bb.0:
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; X64-AVX2-NEXT: vmovq %xmm0, %rax
-; X64-AVX2-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v2i64:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; X64-AVX1OR2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; X64-AVX1OR2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX1OR2-NEXT: vmovq %xmm0, %rax
+; X64-AVX1OR2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v2i64:
; X64-AVX512: ## %bb.0:
@@ -249,14 +241,23 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <8 x i16> %a0, %1
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
@@ -370,16 +371,27 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorb $127, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorb $127, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorb $127, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <16 x i8> %a0, %1
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
@@ -807,7 +819,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
@@ -991,7 +1003,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
@@ -1578,7 +1590,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
@@ -1799,7 +1811,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
@@ -1889,15 +1901,25 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <16 x i16> %a0, %1
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
@@ -1966,15 +1988,25 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <32 x i16> %a0, %1
%3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
@@ -2089,17 +2121,29 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorb $127, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorb $127, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorb $127, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <32 x i8> %a0, %1
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
@@ -2217,17 +2261,29 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorb $127, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorb $127, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorb $127, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp sgt <64 x i8> %a0, %1
%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index c66fb7ec28090..47bb0957f3fbb 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -5,8 +5,8 @@
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
;
@@ -88,21 +88,13 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X64-SSE42-NEXT: movq %xmm2, %rax
; X64-SSE42-NEXT: retq
;
-; X64-AVX1-LABEL: test_reduce_v2i64:
-; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; X64-AVX1-NEXT: vmovq %xmm0, %rax
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: test_reduce_v2i64:
-; X64-AVX2: ## %bb.0:
-; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; X64-AVX2-NEXT: vmovq %xmm0, %rax
-; X64-AVX2-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v2i64:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; X64-AVX1OR2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; X64-AVX1OR2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-AVX1OR2-NEXT: vmovq %xmm0, %rax
+; X64-AVX1OR2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v2i64:
; X64-AVX512: ## %bb.0:
@@ -251,14 +243,23 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <8 x i16> %a0, %1
%3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
@@ -372,16 +373,27 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: addb $-128, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: addb $-128, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: addb $-128, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <16 x i8> %a0, %1
%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
@@ -811,7 +823,7 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
@@ -995,7 +1007,7 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
@@ -1582,7 +1594,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
@@ -1803,7 +1815,7 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
@@ -1893,15 +1905,25 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <16 x i16> %a0, %1
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
@@ -1970,15 +1992,25 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
-; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
+; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <32 x i16> %a0, %1
%3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
@@ -2093,17 +2125,29 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: addb $-128, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: addb $-128, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: addb $-128, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <32 x i8> %a0, %1
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
@@ -2221,17 +2265,29 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
-; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
-; X64-AVX: ## %bb.0:
-; X64-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
-; X64-AVX-NEXT: vmovd %xmm0, %eax
-; X64-AVX-NEXT: addb $-128, %al
-; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT: vzeroupper
-; X64-AVX-NEXT: retq
+; X64-AVX1OR2-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX1OR2: ## %bb.0:
+; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax
+; X64-AVX1OR2-NEXT: addb $-128, %al
+; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX1OR2-NEXT: vzeroupper
+; X64-AVX1OR2-NEXT: retq
+;
+; X64-AVX512-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX512: ## %bb.0:
+; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT: vmovd %xmm0, %eax
+; X64-AVX512-NEXT: addb $-128, %al
+; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT: vzeroupper
+; X64-AVX512-NEXT: retq
%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp slt <64 x i8> %a0, %1
%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll
index ae7301b211de8..b5974f03563fc 100644
--- a/llvm/test/CodeGen/X86/i64-to-float.ll
+++ b/llvm/test/CodeGen/X86/i64-to-float.ll
@@ -305,16 +305,16 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
;
; X86-AVX512F-LABEL: clamp_sitofp_2i64_2f64:
; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX512F-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86-AVX512F-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
; X86-AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
; X86-AVX512F-NEXT: retl
;
; X86-AVX512DQ-LABEL: clamp_sitofp_2i64_2f64:
; X86-AVX512DQ: # %bb.0:
-; X86-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX512DQ-NEXT: vpmaxsq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86-AVX512DQ-NEXT: vpminsq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
; X86-AVX512DQ-NEXT: vcvtqq2pd %xmm0, %xmm0
; X86-AVX512DQ-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll b/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
index d5ad852537be3..0b1137ff96643 100644
--- a/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
+++ b/llvm/test/CodeGen/X86/icmp-pow2-
diff .ll
@@ -182,7 +182,7 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
define <8 x i1> @andnot_ne_v8i16(<8 x i16> %x) nounwind {
; AVX512-LABEL: andnot_ne_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
@@ -249,7 +249,7 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind {
define <16 x i1> @andnot_ne_v16i8(<16 x i8> %x) nounwind {
; AVX512-LABEL: andnot_ne_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
index f70b7a227172a..7e6cfc56574f5 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2554,7 +2554,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2587,7 +2587,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; AVX512VLBW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2787,7 +2787,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm2, %xmm2, %xmm2
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2820,7 +2820,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; AVX512VLBW-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3029,7 +3029,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm0, %xmm2, %xmm0
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -3063,7 +3063,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; AVX512VLBW-NEXT: vpmaxsb %xmm0, %xmm1, %xmm0
; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %xmm0, %xmm2, %xmm0 {%k1}
; AVX512VLBW-NEXT: vpaddb %xmm1, %xmm0, %xmm0
@@ -3269,7 +3269,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -3303,7 +3303,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VLBW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3518,7 +3518,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -3553,7 +3553,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VLBW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
index 4242743614503..960a55f01aec9 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
@@ -2026,7 +2026,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2058,7 +2058,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin
; AVX512VLBW-NEXT: vpmaxsb %ymm1, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm2, %ymm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
@@ -2215,7 +2215,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm2, %ymm2, %ymm2
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2247,7 +2247,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw
; AVX512VLBW-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm2, %ymm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
@@ -2404,7 +2404,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm0, %ymm2, %ymm0
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2437,7 +2437,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind
; AVX512VLBW-NEXT: vpmaxsb %ymm0, %ymm1, %ymm0
; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %ymm0, %ymm2, %ymm0 {%k1}
; AVX512VLBW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
@@ -2593,7 +2593,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2626,7 +2626,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VLBW-NEXT: vpmaxsb %ymm1, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm2, %ymm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
@@ -2787,7 +2787,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2
+; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2821,7 +2821,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VLBW-NEXT: vpmaxsb %ymm1, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm2, %ymm1 {%k1}
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
index 364b3612410c7..c2f8cbf81b5d8 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
@@ -693,7 +693,7 @@ define <64 x i8> @vec512_i8_signed_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounwin
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsubb %ymm2, %ymm5, %ymm2
@@ -722,7 +722,7 @@ define <64 x i8> @vec512_i8_signed_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-FALLBACK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm5, %ymm2
@@ -742,7 +742,7 @@ define <64 x i8> @vec512_i8_signed_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounwin
; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1 {%k1}
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
@@ -775,7 +775,7 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpsubb %ymm2, %ymm4, %ymm2
@@ -804,7 +804,7 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-FALLBACK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm4, %ymm2
@@ -824,7 +824,7 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw
; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1 {%k1}
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
@@ -860,7 +860,7 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsubb %ymm1, %ymm5, %ymm1
@@ -890,7 +890,7 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-FALLBACK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm5, %ymm1
@@ -911,7 +911,7 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind
; AVX512BW-NEXT: vpmaxsb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsubb %zmm0, %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
@@ -946,7 +946,7 @@ define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsubb %ymm2, %ymm5, %ymm2
@@ -976,7 +976,7 @@ define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-FALLBACK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm5, %ymm2
@@ -997,7 +997,7 @@ define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind
; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1 {%k1}
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
@@ -1033,7 +1033,7 @@ define <64 x i8> @vec512_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsubb %ymm1, %ymm5, %ymm1
@@ -1064,7 +1064,7 @@ define <64 x i8> @vec512_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-FALLBACK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm5, %ymm1
@@ -1086,7 +1086,7 @@ define <64 x i8> @vec512_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm1
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1 {%k1}
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 9a01252425465..57fdd3efcf231 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1928,7 +1928,7 @@ define dso_local void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i
define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind "min-legal-vector-width"="256" {
; CHECK-LABEL: var_rotate_v16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
@@ -1950,7 +1950,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind "min-leg
define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "min-legal-vector-width"="256" {
; CHECK-LABEL: var_rotate_v32i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -2025,7 +2025,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-NEXT: retq
%shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 6db6b7bc4dc13..1e31d88e88f3f 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -1315,7 +1315,7 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
;
; AVX512-LABEL: allzeros_v64i8_and1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
@@ -1572,7 +1572,7 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
;
; AVX512-LABEL: allzeros_v32i16_and1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
@@ -2461,7 +2461,7 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
;
; AVX512-LABEL: allzeros_v64i8_and4:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
@@ -2718,7 +2718,7 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
;
; AVX512-LABEL: allzeros_v32i16_and4:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index dcbf5cf2f0b75..556a2be0b138a 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -89,7 +89,7 @@
; CHECK-NEXT: Local Dynamic TLS Access Clean-up
; CHECK-NEXT: X86 PIC Global Base Reg Initialization
; CHECK-NEXT: Argument Stack Rebase
-; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
+; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: X86 Domain Reassignment Pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
@@ -204,6 +204,7 @@
; CHECK-NEXT: X86 Atom pad short functions
; CHECK-NEXT: X86 LEA Fixup
; CHECK-NEXT: X86 Fixup Inst Tuning
+; CHECK-NEXT: X86 Fixup Vector Constants
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
; CHECK-NEXT: X86 Discriminate Memory Operands
; CHECK-NEXT: X86 Insert Cache Prefetches
diff --git a/llvm/test/CodeGen/X86/paddus.ll b/llvm/test/CodeGen/X86/paddus.ll
index 766c681cd364b..1f0a6b8a68eee 100644
--- a/llvm/test/CodeGen/X86/paddus.ll
+++ b/llvm/test/CodeGen/X86/paddus.ll
@@ -131,7 +131,7 @@ define <16 x i8> @test5(<16 x i8> %x) {
;
; AVX512-LABEL: test5:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
; AVX512-NEXT: vpcmpltub %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
@@ -350,7 +350,7 @@ define <32 x i8> @test11(<32 x i8> %x) {
;
; AVX512-LABEL: test11:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
; AVX512-NEXT: vpcmpltub %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
@@ -658,7 +658,7 @@ define <64 x i8> @test17(<64 x i8> %x) {
;
; AVX512-LABEL: test17:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm1
; AVX512-NEXT: vpcmpltub %zmm0, %zmm1, %k1
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
@@ -852,7 +852,7 @@ define <8 x i16> @test23(<8 x i16> %x) {
;
; AVX512-LABEL: test23:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
; AVX512-NEXT: vpcmpltuw %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
@@ -1103,7 +1103,7 @@ define <16 x i16> @test29(<16 x i16> %x) {
;
; AVX512-LABEL: test29:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
; AVX512-NEXT: vpcmpltuw %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
@@ -1467,7 +1467,7 @@ define <32 x i16> @test35(<32 x i16> %x) {
;
; AVX512-LABEL: test35:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm1
; AVX512-NEXT: vpcmpltuw %zmm0, %zmm1, %k1
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; AVX512-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll b/llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
index 056aeaba9271c..0ce83b190ead8 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
@@ -41,7 +41,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX256-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX256-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX256-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
; AVX256-NEXT: vpand %xmm3, %xmm2, %xmm2
@@ -92,7 +92,7 @@ define <32 x i8> @testv32i8(<32 x i8> %in) {
; AVX256-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX256-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX256-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
; AVX256-NEXT: vpand %ymm3, %ymm2, %ymm2
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mulo.ll b/llvm/test/CodeGen/X86/prefer-avx256-mulo.ll
index ef7a128b1fab5..e3a608abfda43 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-mulo.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-mulo.ll
@@ -11,7 +11,7 @@ define <16 x i1> @smulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, ptr %p2) nounwind {
; AVX256-NEXT: vpsrlw $8, %ymm0, %ymm1
; AVX256-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX256-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX256-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX256-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -75,7 +75,7 @@ define <16 x i1> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, ptr %p2) nounwind {
; AVX256-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX256-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX256-NEXT: vptestmd %ymm1, %ymm1, %k2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovdqa %xmm0, (%rdi)
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
index 79d8aa27faf74..bf04c8d435559 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-shift.ll
@@ -11,10 +11,10 @@ define <32 x i8> @var_shl_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256: # %bb.0:
; AVX256-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX256-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX256-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX256-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -34,10 +34,10 @@ define <32 x i8> @var_shl_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -115,10 +115,10 @@ define <16 x i8> @var_shl_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX256VL: # %bb.0:
; AVX256VL-NEXT: vpsllw $5, %xmm1, %xmm1
; AVX256VL-NEXT: vpsllw $4, %xmm0, %xmm2
-; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX256VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX256VL-NEXT: vpsllw $2, %xmm0, %xmm2
-; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX256VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX256VL-NEXT: vpaddb %xmm0, %xmm0, %xmm2
@@ -153,14 +153,14 @@ define <32 x i8> @var_lshr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256: # %bb.0:
; AVX256-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX256-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX256-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX256-NEXT: retq
@@ -177,14 +177,14 @@ define <32 x i8> @var_lshr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -259,14 +259,14 @@ define <16 x i8> @var_lshr_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX256VL: # %bb.0:
; AVX256VL-NEXT: vpsllw $5, %xmm1, %xmm1
; AVX256VL-NEXT: vpsrlw $4, %xmm0, %xmm2
-; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX256VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX256VL-NEXT: vpsrlw $2, %xmm0, %xmm2
-; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX256VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX256VL-NEXT: vpsrlw $1, %xmm0, %xmm2
-; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX256VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX256VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-trunc.ll b/llvm/test/CodeGen/X86/prefer-avx256-trunc.ll
index 3fbf694278df0..6ea480465a764 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-trunc.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-trunc.ll
@@ -11,7 +11,7 @@
define <16 x i8> @testv16i16_trunc_v16i8(<16 x i16> %x) {
; AVX256NOBW-LABEL: testv16i16_trunc_v16i8:
; AVX256NOBW: # %bb.0:
-; AVX256NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256NOBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX256NOBW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX256NOBW-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
index 3b811f40f1fdb..91d4aa6c91dbb 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+prefer-256-bit | FileCheck %s --check-prefix=AVX256BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,-prefer-256-bit | FileCheck %s --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,-prefer-256-bit | FileCheck %s --check-prefix=AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+prefer-256-bit | FileCheck %s --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,-prefer-256-bit | FileCheck %s --check-prefix=AVX512BW
@@ -18,12 +18,26 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) {
; AVX256BW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
; AVX256BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX256BW-NEXT: vpsrlw $1, %ymm0, %ymm0
-; AVX256BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX256BW-NEXT: vpsrlw $2, %ymm0, %ymm0
-; AVX256BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX256BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX256BW-NEXT: retq
;
+; AVX512BWVL-LABEL: test_div7_32i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BWVL-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BWVL-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BWVL-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw $1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlw $2, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512BWVL-NEXT: retq
+;
; AVX512BW-LABEL: test_div7_32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
@@ -56,6 +70,14 @@ define <32 x i8> @test_mul_32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX256BW-NEXT: retq
;
+; AVX512BWVL-LABEL: test_mul_32i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+;
; AVX512BW-LABEL: test_mul_32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 9b25707fcc8be..ac0037ab15003 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -76,15 +76,35 @@ define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, ptr %p1, ptr %p2) noun
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: ashr_xor_and_commute_uses:
-; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa %xmm1, (%rdi)
-; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vmovdqa %xmm0, (%rsi)
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: ashr_xor_and_commute_uses:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ashr_xor_and_commute_uses:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
+; AVX2-NEXT: vmovdqa %xmm1, (%rdi)
+; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa %xmm0, (%rsi)
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ashr_xor_and_commute_uses:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1
+; AVX512-NEXT: vmovdqa %xmm1, (%rdi)
+; AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%signsplat = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
store <16 x i8> %signsplat, ptr %p1
%flipsign = xor <16 x i8> %x, <i8 undef, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
index 69c1f4a53286c..58d9d7e6952f3 100644
--- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
@@ -105,14 +105,14 @@ define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind {
; X86: # %bb.0:
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: vprold $7, %xmm0, %xmm0
-; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vrolw_extract_mul_with_mask:
; X64: # %bb.0:
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: vprold $7, %xmm0, %xmm0
-; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: retq
%lhs_mul = mul <4 x i32> %i, <i32 1152, i32 1152, i32 1152, i32 1152>
%rhs_mul = mul <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9>
@@ -151,7 +151,7 @@ define <4 x i64> @no_extract_shl(<4 x i64> %i) nounwind {
; X86: # %bb.0:
; X86-NEXT: vpsllq $24, %ymm0, %ymm1
; X86-NEXT: vpsrlq $39, %ymm0, %ymm0
-; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
+; X86-NEXT: vpternlogq $236, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: no_extract_shl:
diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll
index ae74e9e6e042b..11d62c307a1dd 100644
--- a/llvm/test/CodeGen/X86/rotate_vec.ll
+++ b/llvm/test/CodeGen/X86/rotate_vec.ll
@@ -45,7 +45,7 @@ define <4 x i32> @rot_v4i32_splat_2masks(<4 x i32> %x) {
; AVX512-LABEL: rot_v4i32_splat_2masks:
; AVX512: # %bb.0:
; AVX512-NEXT: vprold $31, %xmm0, %xmm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%2 = and <4 x i32> %1, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
@@ -123,7 +123,7 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) {
; AVX512-LABEL: rot_v4i32_mask_ashr0:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 28>
%2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
@@ -151,7 +151,7 @@ define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $25, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 28>
%2 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index ab8a8d3bfc5bc..34eaec95e5ac3 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -567,7 +567,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX512BW-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 59c64a3de71d0..f52132587c1df 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -2489,7 +2489,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
; CHECK-AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; CHECK-AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-AVX512VL-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0
; CHECK-AVX512VL-NEXT: vpandn %ymm0, %ymm3, %ymm3
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 3c87f3b06a77b..c8fd7e89c605f 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -567,7 +567,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX512BW-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
@@ -601,7 +601,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpternlogq $96, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512BW-NEXT: vpternlogd $96, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 5cdc516cb4337..a49f383e82631 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -527,7 +527,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpternlogq $96, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512BW-NEXT: vpternlogd $96, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
index 7e10ab56faae1..ebdf12321b22f 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
@@ -73,13 +73,13 @@ define <8 x half> @sitofp_v8i1_v8f16(<8 x i1> %x) #0 {
define <8 x half> @uitofp_v8i1_v8f16(<8 x i1> %x) #0 {
; X86-LABEL: uitofp_v8i1_v8f16:
; X86: # %bb.0:
-; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: vcvtuw2ph %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_v8i1_v8f16:
; X64: # %bb.0:
-; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: vcvtuw2ph %xmm0, %xmm0
; X64-NEXT: retq
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1> %x,
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
index cdb2d69e8b53f..93c340ebde76e 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
@@ -32,14 +32,14 @@ define <16 x half> @sitofp_v16i1_v16f16(<16 x i1> %x) #0 {
define <16 x half> @uitofp_v16i1_v16f16(<16 x i1> %x) #0 {
; X86-LABEL: uitofp_v16i1_v16f16:
; X86: # %bb.0:
-; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X86-NEXT: vcvtuw2ph %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_v16i1_v16f16:
; X64: # %bb.0:
-; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X64-NEXT: vcvtuw2ph %ymm0, %ymm0
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index 6f17a51d01546..a336d0a01fa7b 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -142,14 +142,14 @@ define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 {
;
; AVX512VL-32-LABEL: uitofp_v8i1_v8f32:
; AVX512VL-32: # %bb.0:
-; AVX512VL-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; AVX512VL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512VL-32-NEXT: retl
;
; AVX512VL-64-LABEL: uitofp_v8i1_v8f32:
; AVX512VL-64: # %bb.0:
-; AVX512VL-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
@@ -170,14 +170,14 @@ define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 {
;
; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32:
; AVX512DQVL-32: # %bb.0:
-; AVX512DQVL-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; AVX512DQVL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQVL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQVL-32-NEXT: retl
;
; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32:
; AVX512DQVL-64: # %bb.0:
-; AVX512DQVL-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512DQVL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQVL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQVL-64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
index c807af0932b56..5067a2e9c4212 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
@@ -30,14 +30,14 @@ define <32 x half> @sitofp_v32i1_v32f16(<32 x i1> %x) #0 {
define <32 x half> @uitofp_v32i1_v32f16(<32 x i1> %x) #0 {
; X86-LABEL: uitofp_v32i1_v32f16:
; X86: # %bb.0:
-; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
; X86-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; X86-NEXT: vcvtuw2ph %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_v32i1_v32f16:
; X64: # %bb.0:
-; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; X64-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; X64-NEXT: vcvtuw2ph %zmm0, %zmm0
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 7c6199f30a756..caf4efbbf32c6 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -536,7 +536,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-NEXT: vpslld $16, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
@@ -805,7 +805,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
@@ -849,7 +849,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsrlw $8, %ymm0, %ymm0
@@ -863,7 +863,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VLVBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47]
; AVX512VLVBMI2-NEXT: vpermi2b %ymm0, %ymm1, %ymm3
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsllvw %ymm0, %ymm3, %ymm0
; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 31121f10a83ab..adae44774b182 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -385,7 +385,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpsrld $16, %zmm0, %zmm0
@@ -614,10 +614,10 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm6
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm6, %ymm6
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm6
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm6, %ymm6
; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm6
@@ -628,10 +628,10 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
@@ -671,7 +671,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
@@ -684,7 +684,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLVBMI2-NEXT: vpsllvw %zmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index dc595801d74b5..bdec9cdf91a64 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -138,7 +138,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpslld $16, %zmm4, %zmm4
; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512F-NEXT: vpsllvd %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpsrld $16, %zmm3, %zmm3
@@ -163,7 +163,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpslld $16, %zmm4, %zmm4
; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3
-; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512VL-NEXT: vpsllvd %zmm4, %zmm3, %zmm3
; AVX512VL-NEXT: vpsrld $16, %zmm3, %zmm3
@@ -227,7 +227,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512F-NEXT: vpsrlw $4, %ymm5, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm7
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpxor %ymm3, %ymm8, %ymm9
@@ -296,7 +296,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VL-NEXT: vpsrlw $4, %ymm5, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm7
-; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpxor %ymm3, %ymm8, %ymm9
@@ -359,7 +359,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512BW-LABEL: var_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512BW-NEXT: vpsllvw %zmm5, %zmm3, %zmm3
@@ -374,7 +374,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VBMI2-LABEL: var_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VBMI2-NEXT: vpsllvw %zmm5, %zmm3, %zmm3
@@ -389,7 +389,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VLBW-LABEL: var_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VLBW-NEXT: vpsllvw %zmm5, %zmm3, %zmm3
@@ -404,7 +404,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VLVBMI2-NEXT: vpsllvw %zmm5, %zmm3, %zmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 18e07d19b6754..74b7fa84aac12 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -396,7 +396,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VL-LABEL: var_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4,4,5,5,6,6,7,7]
@@ -423,7 +423,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
@@ -598,7 +598,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2
; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpsrld $8, %zmm0, %zmm0
@@ -624,7 +624,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
@@ -655,7 +655,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 8f2c6a3c024fd..fc0804b7c92e2 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -300,7 +300,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VL-LABEL: var_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
@@ -326,7 +326,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
@@ -477,7 +477,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -499,7 +499,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -529,7 +529,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 9e1f19f00dc70..daf4af02727cb 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -94,7 +94,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -104,7 +104,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -210,7 +210,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -225,7 +225,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -240,7 +240,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -255,7 +255,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 76cd08d375f32..924de00641efb 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -594,7 +594,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-NEXT: vpslld $16, %ymm0, %ymm0
; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
@@ -925,7 +925,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
@@ -938,7 +938,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt)
; AVX512VLVBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47]
; AVX512VLVBMI2-NEXT: vpermi2b %ymm0, %ymm1, %ymm3
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VLVBMI2-NEXT: vpsrlvw %ymm0, %ymm3, %ymm0
; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index afff0546203e4..9b230ccefd3c8 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -414,7 +414,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
@@ -645,13 +645,13 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm4
; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm5
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm6
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm6, %ymm6
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm4
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm4
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4
; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2
@@ -659,10 +659,10 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
@@ -700,7 +700,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
@@ -712,7 +712,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
; AVX512VLVBMI2-NEXT: vpsrlvw %zmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 38998b8ab295f..0bbc09dc11bd2 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -142,7 +142,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpslld $16, %zmm4, %zmm4
; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512F-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
@@ -165,7 +165,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VL-NEXT: vpslld $16, %zmm4, %zmm4
; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3
-; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero
; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3
; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3
@@ -227,7 +227,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512F-NEXT: vpsllw $4, %ymm4, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm6
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpxor %ymm7, %ymm3, %ymm8
@@ -294,7 +294,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VL-NEXT: vpsllw $4, %ymm4, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm6
-; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512VL-NEXT: vpxor %ymm7, %ymm3, %ymm8
@@ -357,7 +357,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512BW-LABEL: var_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512BW-NEXT: vpsrlvw %zmm5, %zmm3, %zmm3
@@ -373,7 +373,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VBMI2-LABEL: var_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VBMI2-NEXT: vpsrlvw %zmm5, %zmm3, %zmm3
@@ -387,7 +387,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VLBW-LABEL: var_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VLBW-NEXT: vpsrlvw %zmm5, %zmm3, %zmm3
@@ -403,7 +403,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt)
; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm5 = zmm2[8],zmm4[8],zmm2[9],zmm4[9],zmm2[10],zmm4[10],zmm2[11],zmm4[11],zmm2[12],zmm4[12],zmm2[13],zmm4[13],zmm2[14],zmm4[14],zmm2[15],zmm4[15],zmm2[24],zmm4[24],zmm2[25],zmm4[25],zmm2[26],zmm4[26],zmm2[27],zmm4[27],zmm2[28],zmm4[28],zmm2[29],zmm4[29],zmm2[30],zmm4[30],zmm2[31],zmm4[31],zmm2[40],zmm4[40],zmm2[41],zmm4[41],zmm2[42],zmm4[42],zmm2[43],zmm4[43],zmm2[44],zmm4[44],zmm2[45],zmm4[45],zmm2[46],zmm4[46],zmm2[47],zmm4[47],zmm2[56],zmm4[56],zmm2[57],zmm4[57],zmm2[58],zmm4[58],zmm2[59],zmm4[59],zmm2[60],zmm4[60],zmm2[61],zmm4[61],zmm2[62],zmm4[62],zmm2[63],zmm4[63]
; AVX512VLVBMI2-NEXT: vpsrlvw %zmm5, %zmm3, %zmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 2dd9228c8ec42..04e4e66dd1b95 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -414,7 +414,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VL-LABEL: var_funnnel_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4,4,5,5,6,6,7,7]
@@ -441,7 +441,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
@@ -621,7 +621,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2
; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
@@ -647,7 +647,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
@@ -678,7 +678,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index a7de4fa4d9e36..a62b2b70abdb1 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -316,7 +316,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VL-LABEL: var_funnnel_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
@@ -342,7 +342,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
@@ -525,7 +525,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -556,7 +556,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 1d91b02b45623..e673b1678baea 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -94,7 +94,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -104,7 +104,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -208,7 +208,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -224,7 +224,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VLBW-LABEL: var_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -240,7 +240,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -254,7 +254,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
;
; AVX512VLVBMI2-LABEL: var_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 7a9a468122f6b..04761b6d35c5f 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -182,9 +182,9 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm1
+; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm1
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -269,7 +269,7 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpackuswb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7>
@@ -507,13 +507,13 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm2
+; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %zmm2
; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@@ -615,7 +615,7 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index 1baac968cdc52..5169dd69f39fc 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -162,7 +162,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_div7_64i8:
@@ -178,10 +178,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
@@ -524,12 +524,12 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index 31f88d06d37a7..122b478577fbf 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -3,10 +3,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=NOBW,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=NOBW,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=NOBW,AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=AVX512VLBWDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=NOBW,AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=NOBW,AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=NOBW,AVX,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=AVX512VLBWDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefixes=NOBW,AVX512,AVX512VLCD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefixes=NOBW,AVX512,AVX512CD
;
@@ -158,40 +158,68 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE41-NEXT: paddq %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv2i64:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrld $16, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrlq $32, %xmm1, %xmm1
-; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv2i64:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrld $16, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrlq $32, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrld $16, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv2i64:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -408,40 +436,68 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE41-NEXT: paddq %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv2i64u:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrld $16, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrlq $32, %xmm1, %xmm1
-; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv2i64u:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrld $16, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrlq $32, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv2i64u:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrld $16, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv2i64u:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -650,35 +706,58 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE41-NEXT: paddd %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv4i32:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv4i32:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -876,35 +955,58 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE41-NEXT: paddd %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv4i32u:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv4i32u:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv4i32u:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrld $16, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv4i32u:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -1078,30 +1180,48 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE41-NEXT: paddw %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv8i16:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv8i16:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv8i16:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -1268,30 +1388,48 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE41-NEXT: paddw %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv8i16u:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
-; AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
-; AVX-NEXT: vpand %xmm5, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv8i16u:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX1OR2-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX1OR2-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX1OR2-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1OR2-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv8i16u:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
+; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
+; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddb %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv8i16u:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm5
; AVX512VLBWDQ-NEXT: vpand %xmm5, %xmm2, %xmm2
@@ -1442,25 +1580,38 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpaddb %xmm0, %xmm2, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv16i8:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1OR2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1OR2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv16i8:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
; AVX512VLBWDQ-NEXT: vpand %xmm3, %xmm2, %xmm2
@@ -1592,25 +1743,38 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv16i8u:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
-; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
-; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpaddb %xmm0, %xmm2, %xmm0
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: testv16i8u:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX1OR2-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1OR2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1OR2-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
+; AVX1OR2-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1OR2-NEXT: vpshufb %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: testv16i8u:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vpshufb %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
;
; AVX512VLBWDQ-LABEL: testv16i8u:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3
; AVX512VLBWDQ-NEXT: vpand %xmm3, %xmm2, %xmm2
@@ -1841,3 +2005,5 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-256.ll b/llvm/test/CodeGen/X86/vector-lzcnt-256.ll
index f9cac4ba97149..64f8ed9c20436 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-256.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-256.ll
@@ -96,7 +96,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -124,7 +124,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -278,7 +278,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -306,7 +306,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -445,7 +445,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -468,7 +468,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -597,7 +597,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -620,7 +620,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -734,7 +734,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -752,7 +752,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -851,7 +851,7 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -869,7 +869,7 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
@@ -953,7 +953,7 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
@@ -966,7 +966,7 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
@@ -1045,7 +1045,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
@@ -1058,7 +1058,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index 78be6dfbff06e..c015185fe4511 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -360,7 +360,7 @@ define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
; AVX512BW-LABEL: testv32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
@@ -444,7 +444,7 @@ define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
; AVX512BW-LABEL: testv32i16u:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
@@ -548,7 +548,7 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
; AVX512BW-LABEL: testv64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
@@ -636,7 +636,7 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
; AVX512BW-LABEL: testv64i8u:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 55b58995b87b6..34ab53e552b74 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -81,7 +81,7 @@ define <16 x i8> @mul_v16i8_32(<16 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v16i8_32:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $5, %xmm0, %xmm0
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <16 x i8> %a0, <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
ret <16 x i8> %1
@@ -417,7 +417,7 @@ define <16 x i8> @mul_v16i8_17(<16 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v16i8_17:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $4, %xmm0, %xmm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <16 x i8> %a0, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17>
@@ -585,7 +585,7 @@ define <32 x i8> @mul_v32i8_17(<32 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v32i8_17:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; X64-AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <32 x i8> %a0, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17>
@@ -725,7 +725,7 @@ define <16 x i8> @mul_v16i8_neg5(<16 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v16i8_neg5:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $2, %xmm0, %xmm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpsubb %xmm0, %xmm1, %xmm0
@@ -933,7 +933,7 @@ define <32 x i8> @mul_v32i8_neg5(<32 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v32i8_neg5:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; X64-AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; X64-AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpsubb %ymm0, %ymm1, %ymm0
@@ -1261,7 +1261,7 @@ define <16 x i8> @mul_v16i8_31(<16 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v16i8_31:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $5, %xmm0, %xmm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpsubb %xmm0, %xmm1, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <16 x i8> %a0, <i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31, i8 31>
@@ -1385,7 +1385,7 @@ define <16 x i8> @mul_v16i8_neg15(<16 x i8> %a0) nounwind {
; X64-AVX512DQ-LABEL: mul_v16i8_neg15:
; X64-AVX512DQ: # %bb.0:
; X64-AVX512DQ-NEXT: vpsllw $4, %xmm0, %xmm1
-; X64-AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; X64-AVX512DQ-NEXT: retq
%1 = mul <16 x i8> %a0, <i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15>
diff --git a/llvm/test/CodeGen/X86/vector-pack-128.ll b/llvm/test/CodeGen/X86/vector-pack-128.ll
index b88bb4641b4db..a462b8b0c955d 100644
--- a/llvm/test/CodeGen/X86/vector-pack-128.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-128.ll
@@ -95,12 +95,26 @@ define <16 x i8> @trunc_concat_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwi
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: trunc_concat_packsswb_128:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: trunc_concat_packsswb_128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_concat_packsswb_128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_concat_packsswb_128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -116,12 +130,26 @@ define <16 x i8> @trunc_concat_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwi
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: trunc_concat_packuswb_128:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: trunc_concat_packuswb_128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_concat_packuswb_128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_concat_packuswb_128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -231,12 +259,26 @@ define <16 x i8> @concat_trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwi
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: concat_trunc_packsswb_128:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: concat_trunc_packsswb_128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_trunc_packsswb_128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: concat_trunc_packsswb_128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = trunc <8 x i16> %1 to <8 x i8>
@@ -253,12 +295,26 @@ define <16 x i8> @concat_trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwi
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: concat_trunc_packuswb_128:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: concat_trunc_packuswb_128:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_trunc_packuswb_128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: concat_trunc_packuswb_128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = trunc <8 x i16> %1 to <8 x i8>
@@ -266,3 +322,5 @@ define <16 x i8> @concat_trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwi
%5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %5
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
diff --git a/llvm/test/CodeGen/X86/vector-pack-256.ll b/llvm/test/CodeGen/X86/vector-pack-256.ll
index 018b77823c486..01269b48efc09 100644
--- a/llvm/test/CodeGen/X86/vector-pack-256.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-256.ll
@@ -103,7 +103,7 @@ define <32 x i8> @trunc_concat_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512F-LABEL: trunc_concat_packsswb_256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
@@ -116,7 +116,7 @@ define <32 x i8> @trunc_concat_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512BW-LABEL: trunc_concat_packsswb_256:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsraw $15, %ymm0, %ymm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -152,7 +152,7 @@ define <32 x i8> @trunc_concat_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512F-LABEL: trunc_concat_packuswb_256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
@@ -165,7 +165,7 @@ define <32 x i8> @trunc_concat_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512BW-LABEL: trunc_concat_packuswb_256:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $15, %ymm0, %ymm0
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512BW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -254,7 +254,7 @@ define <16 x i16> @concat_trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) nounw
; AVX512-NEXT: vpsrld $17, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %ymm0, %xmm0
; AVX512-NEXT: vpmovdw %ymm1, %xmm1
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm0[1],xmm1[1]
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
@@ -304,7 +304,7 @@ define <32 x i8> @concat_trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm0[1],xmm1[1]
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
@@ -315,7 +315,7 @@ define <32 x i8> @concat_trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512BW-NEXT: vpsraw $15, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BW-NEXT: vpmovwb %ymm1, %xmm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm0[1],xmm1[1]
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
@@ -365,7 +365,7 @@ define <32 x i8> @concat_trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm0[1],xmm1[1]
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
@@ -376,7 +376,7 @@ define <32 x i8> @concat_trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) noun
; AVX512BW-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BW-NEXT: vpmovwb %ymm1, %xmm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm0[1],xmm1[1]
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-pack-512.ll b/llvm/test/CodeGen/X86/vector-pack-512.ll
index 31ef3c6d8fb8b..26be8013adbbd 100644
--- a/llvm/test/CodeGen/X86/vector-pack-512.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-512.ll
@@ -51,7 +51,7 @@ define <64 x i8> @trunc_concat_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,8,9,2,3,10,11]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,5,12,13,6,7,14,15]
@@ -74,7 +74,7 @@ define <64 x i8> @trunc_concat_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512BW-LABEL: trunc_concat_packsswb_512:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsraw $15, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,5,12,13,6,7,14,15]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,2,3,10,11]
@@ -97,7 +97,7 @@ define <64 x i8> @trunc_concat_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,8,9,2,3,10,11]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm3 = [4,5,12,13,6,7,14,15]
@@ -120,7 +120,7 @@ define <64 x i8> @trunc_concat_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512BW-LABEL: trunc_concat_packuswb_512:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $15, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,5,12,13,6,7,14,15]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,2,3,10,11]
@@ -195,7 +195,7 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
@@ -207,7 +207,7 @@ define <64 x i8> @concat_trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512BW-NEXT: vpsraw $15, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
@@ -238,7 +238,7 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
@@ -250,7 +250,7 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
; AVX512BW-NEXT: vpsrlw $15, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
-; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,8,1,9,6,14,7,15]
diff --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll
index 03d525ef0aa30..a21b959c229d6 100644
--- a/llvm/test/CodeGen/X86/vector-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vector-pcmp.ll
@@ -255,11 +255,23 @@ define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: cmpeq_zext_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: cmpeq_zext_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: cmpeq_zext_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: cmpeq_zext_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
%cmp = icmp eq <16 x i8> %a, %b
%zext = zext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %zext
@@ -394,7 +406,7 @@ define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512-LABEL: cmpgt_zext_v32i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512-NEXT: retq
%cmp = icmp sgt <32 x i8> %a, %b
%zext = zext <32 x i1> %cmp to <32 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
index ceb8af381eb3a..1f903143ec9d1 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
@@ -28,13 +28,37 @@ define i64 @test_v2i64_v2i32(<2 x i64> %a0) {
; SSE41-NEXT: movq %xmm1, %rax
; SSE41-NEXT: retq
;
-; AVX-LABEL: test_v2i64_v2i32:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: retq
+; AVX1-LABEL: test_v2i64_v2i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_v2i64_v2i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test_v2i64_v2i32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: test_v2i64_v2i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX512BWVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vmovq %xmm0, %rax
+; AVX512BWVL-NEXT: retq
%1 = and <2 x i64> %a0, <i64 255, i64 255>
%2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %1)
ret i64 %2
@@ -271,19 +295,33 @@ define i64 @test_v16i64_v16i8(<16 x i64> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i64_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovqb %zmm1, %xmm1
-; AVX512-NEXT: vpmovqb %zmm0, %xmm0
-; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i64_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1
+; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: test_v16i64_v16i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovqb %zmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX512BWVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vmovq %xmm0, %rax
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
%1 = and <16 x i64> %a0, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %1)
ret i64 %2
@@ -1015,7 +1053,7 @@ define i16 @test_v16i16_v16i8(<16 x i16> %a0) {
; AVX512BWVL-LABEL: test_v16i16_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
@@ -1241,7 +1279,7 @@ define i16 @test_v64i16_v64i8(<64 x i16> %a0) {
; AVX512-NEXT: vpmovwb %zmm0, %ymm0
; AVX512-NEXT: vpmovwb %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpsadbw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 249dc10f52cf0..feb194e7e3cf7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -436,7 +436,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
;
; AVX512-LABEL: trunc_v32i16_v32i1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
@@ -485,7 +485,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
;
; AVX512-LABEL: trunc_v64i8_v64i1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: setne %al
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index 43df251f6d331..dfe163a2aa08c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -987,7 +987,7 @@ define i1 @mask_v128i8(<128 x i8> %a0) {
; AVX512-LABEL: mask_v128i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
+; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 03b74b6566b81..e8dec588224f2 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -1208,14 +1208,23 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
-; AVX512-LABEL: test_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1269,17 +1278,29 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1341,19 +1362,33 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v32i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v32i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1431,20 +1466,35 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v64i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v64i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v64i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32767, %eax # imm = 0x7FFF
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1675,16 +1725,27 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
-; AVX512-LABEL: test_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorb $127, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorb $127, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorb $127, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1768,19 +1829,33 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorb $127, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorb $127, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorb $127, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1880,21 +1955,37 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v64i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorb $127, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorb $127, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v64i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorb $127, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2026,22 +2117,39 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v128i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorb $127, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v128i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorb $127, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v128i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorb $127, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> %a0)
ret i8 %1
}
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index cdf9aa614b58a..55e27fb519a44 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -1208,14 +1208,23 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
-; AVX512-LABEL: test_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1269,17 +1278,29 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1341,19 +1362,33 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v32i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v32i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1431,20 +1466,35 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v64i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
-; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v64i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v64i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: xorl $32768, %eax # imm = 0x8000
+; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1675,16 +1725,27 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
-; AVX512-LABEL: test_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: addb $-128, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: addb $-128, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: addb $-128, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1768,19 +1829,33 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: addb $-128, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: addb $-128, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: addb $-128, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1880,21 +1955,37 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v64i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: addb $-128, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: addb $-128, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v64i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: addb $-128, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2026,22 +2117,39 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_v128i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vphminposuw %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: addb $-128, %al
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512BW-LABEL: test_v128i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, %eax
+; AVX512BW-NEXT: addb $-128, %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_v128i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
+; AVX512VL-NEXT: vmovd %xmm0, %eax
+; AVX512VL-NEXT: addb $-128, %al
+; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
%1 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> %a0)
ret i8 %1
}
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index bcda930923a90..c350531e11b8a 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -326,7 +326,7 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512VL-LABEL: var_rotate_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4,4,5,5,6,6,7,7]
@@ -353,7 +353,7 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
@@ -531,7 +531,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2
; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpsrld $8, %zmm0, %zmm0
@@ -557,7 +557,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
@@ -588,7 +588,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
;
; AVX512VLVBMI2-LABEL: var_rotate_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
@@ -1720,7 +1720,7 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %xmm0, %xmm1
; AVX512VL-NEXT: vpsrlw $11, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v8i16:
@@ -1735,7 +1735,7 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsrlw $11, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i16:
@@ -1749,7 +1749,7 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v8i16:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpshldw $5, %xmm0, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_mask_v8i16:
@@ -1810,7 +1810,7 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind {
; AVX512VLX-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLX-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512VLX-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
-; AVX512VLX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VLX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLX-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_mask_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index fdcf8a0aec101..4dc837be572c9 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -241,7 +241,7 @@ define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512VL-LABEL: var_rotate_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
@@ -267,7 +267,7 @@ define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1
@@ -421,7 +421,7 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -443,7 +443,7 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -473,7 +473,7 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512VLVBMI2-LABEL: var_rotate_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
@@ -1572,7 +1572,7 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm1
; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v16i16:
@@ -1587,7 +1587,7 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsrlw $11, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v16i16:
@@ -1600,7 +1600,7 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v16i16:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpshldw $5, %ymm0, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16:
@@ -1670,7 +1670,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
; AVX512VLX-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLX-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VLX-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
-; AVX512VLX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VLX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_mask_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 91daaecafe603..8374161d6de0d 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -95,7 +95,7 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
;
; AVX512BW-LABEL: var_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -105,7 +105,7 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
@@ -214,7 +214,7 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
;
; AVX512BW-LABEL: var_rotate_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -229,7 +229,7 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
;
; AVX512VLBW-LABEL: var_rotate_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -244,7 +244,7 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
;
; AVX512VBMI2-LABEL: var_rotate_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -259,7 +259,7 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
;
; AVX512VLVBMI2-LABEL: var_rotate_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
@@ -844,7 +844,7 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $11, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16:
@@ -856,33 +856,33 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $11, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; AVX512VL-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogd $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%shl = shl <32 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
%lshr = lshr <32 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
@@ -903,7 +903,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
@@ -916,7 +916,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
@@ -924,7 +924,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
@@ -932,7 +932,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v64i8:
@@ -940,7 +940,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v64i8:
@@ -948,7 +948,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
-; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index ed1910d341a08..df7a66a309ed7 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1177,7 +1177,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
@@ -1187,7 +1187,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
@@ -1735,7 +1735,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 0a264a4d71299..f29d60e6edc59 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1323,7 +1323,7 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
@@ -1995,7 +1995,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
; AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index b1e6c739fac2d..b6ad5306f5d1e 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -465,7 +465,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index ec5cf43a357a9..d501512201cd1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -2335,7 +2335,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -2388,7 +2388,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
@@ -2441,7 +2441,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index e3c2c7dcda138..e248aafab5255 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -980,7 +980,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
@@ -990,7 +990,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
@@ -1463,7 +1463,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index c8716393fd913..eed4637beceea 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -446,14 +446,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
@@ -1093,7 +1093,7 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
@@ -1680,7 +1680,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatconstant_shift_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index fa4575dd54e86..f02849d61454a 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -86,17 +86,17 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: var_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
@@ -393,13 +393,13 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512DQ-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <64 x i8> %shift
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 8d3cb45d396d4..9cab44b069fd4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -2034,7 +2034,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v8i8:
@@ -2073,7 +2073,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v4i8:
@@ -2112,7 +2112,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v2i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 80c6f77383f8f..617a2d9c97375 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -884,7 +884,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
@@ -894,7 +894,7 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
@@ -1327,7 +1327,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index e35468e887a53..07902b4a86dee 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -381,10 +381,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2
-; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -1013,7 +1013,7 @@ define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwi
; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
-; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
@@ -1568,7 +1568,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v32i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatconstant_shift_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 04b04ed3f1d2f..92b60490f976d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -83,12 +83,12 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: var_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
@@ -380,13 +380,13 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512DQ-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <64 x i8> %shift
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
index e80a72d7e6631..bd7b250b3d8c5 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
@@ -1825,7 +1825,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v8i8:
@@ -1864,7 +1864,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v4i8:
@@ -1903,7 +1903,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
; AVX512VL-LABEL: splatconstant_shift_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatconstant_shift_v2i8:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 994ff3b215a3a..f0cdfc9beb0fc 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -160,7 +160,7 @@ define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu
define <16 x float> @shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x float> %a) {
; ALL-LABEL: shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
; ALL: # %bb.0:
-; ALL-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; ALL-NEXT: retq
%tmp1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 undef, i32 17, i32 undef, i32 19, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15>
%tmp2 = shufflevector <16 x float> %tmp1, <16 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index d89c4abab40a3..a0573a449646d 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -612,7 +612,7 @@ define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
@@ -651,7 +651,7 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
@@ -756,7 +756,7 @@ define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
@@ -803,7 +803,7 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
@@ -941,7 +941,7 @@ define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <1
;
; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
@@ -980,7 +980,7 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
;
; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
More information about the llvm-commits
mailing list