[llvm] [SLP] Make getSameOpcode support interchangeable instructions. (PR #132887)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 03:08:07 PDT 2025
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/132887
>From af8dc83eeddcbbb195c020f14964dfab109d2c28 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 25 Mar 2025 08:24:46 +0800
Subject: [PATCH 1/3] [SLP] Make getSameOpcode support interchangeable
instructions. (#127450)
We use the term "interchangeable instructions" to refer to different
operators that have the same meaning (e.g., `add x, 0` is equivalent to
`mul x, 1`).
Non-constant values are not supported, as they may incur high costs with
little benefit.
---------
Co-authored-by: Alexey Bataev <a.bataev at gmx.com>
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 395 ++++++++++++++++--
.../SLPVectorizer/AArch64/vec3-base.ll | 8 +-
...reversed-strided-node-with-external-ptr.ll | 7 +-
.../SLPVectorizer/RISCV/vec3-base.ll | 8 +-
.../SLPVectorizer/X86/barriercall.ll | 4 +-
.../X86/bottom-to-top-reorder.ll | 11 +-
.../buildvector-postpone-for-dependency.ll | 8 +-
.../SLPVectorizer/X86/bv-shuffle-mask.ll | 4 +-
.../X86/extract-scalar-from-undef.ll | 28 +-
.../SLPVectorizer/X86/extractcost.ll | 4 +-
...gathered-delayed-nodes-with-reused-user.ll | 34 +-
.../X86/minbitwidth-drop-wrapping-flags.ll | 4 +-
.../X86/multi-extracts-bv-combined.ll | 6 +-
.../non-scheduled-inst-reused-as-last-inst.ll | 44 +-
.../SLPVectorizer/X86/propagate_ir_flags.ll | 12 +-
.../reduced-val-vectorized-in-transform.ll | 6 +-
.../X86/reorder_diamond_match.ll | 4 +-
.../X86/shuffle-mask-emission.ll | 8 +-
.../Transforms/SLPVectorizer/X86/vec3-base.ll | 19 +-
.../X86/vect_copyable_in_binops.ll | 8 +-
.../alternate-opcode-sindle-bv.ll | 35 +-
.../Transforms/SLPVectorizer/isOpcodeOrAlt.ll | 36 ++
.../resized-alt-shuffle-after-minbw.ll | 4 +-
.../SLPVectorizer/shuffle-mask-resized.ll | 4 +-
24 files changed, 517 insertions(+), 184 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95d697bbd734a..84ed1c11df9e0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -598,6 +598,28 @@ static std::optional<unsigned> getElementIndex(const Value *Inst,
return Index;
}
+/// \returns true if all of the values in \p VL use the same opcode.
+/// For comparison instructions, also checks if predicates match.
+/// PoisonValues are considered matching.
+/// Interchangeable instructions are not considered.
+static bool allSameOpcode(ArrayRef<Value *> VL) {
+ auto *It = find_if(VL, IsaPred<Instruction>);
+ if (It == VL.end())
+ return true;
+ Instruction *MainOp = cast<Instruction>(*It);
+ unsigned Opcode = MainOp->getOpcode();
+ bool IsCmpOp = isa<CmpInst>(MainOp);
+ CmpInst::Predicate BasePred = IsCmpOp ? cast<CmpInst>(MainOp)->getPredicate()
+ : CmpInst::BAD_ICMP_PREDICATE;
+ return std::all_of(It, VL.end(), [&](Value *V) {
+ if (auto *CI = dyn_cast<CmpInst>(V))
+ return BasePred == CI->getPredicate();
+ if (auto *I = dyn_cast<Instruction>(V))
+ return I->getOpcode() == Opcode;
+ return isa<PoisonValue>(V);
+ });
+}
+
namespace {
/// Specifies the way the mask should be analyzed for undefs/poisonous elements
/// in the shuffle mask.
@@ -813,6 +835,301 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
}
namespace {
+/// \returns true if \p Opcode is allowed as part of the main/alternate
+/// instruction for SLP vectorization.
+///
+/// Example of unsupported opcode is SDIV that can potentially cause UB if the
+/// "shuffled out" lane would result in division by zero.
+bool isValidForAlternation(unsigned Opcode) {
+ return !Instruction::isIntDivRem(Opcode);
+}
+
+/// Helper class that determines VL can use the same opcode.
+/// Alternate instruction is supported. In addition, it supports interchangeable
+/// instruction. An interchangeable instruction is an instruction that can be
+/// converted to another instruction with same semantics. For example, x << 1 is
+/// equal to x * 2. x * 1 is equal to x | 0.
+class BinOpSameOpcodeHelper {
+ using MaskType = std::uint_fast16_t;
+ /// Sort SupportedOp because it is used by binary_search.
+ constexpr static std::initializer_list<unsigned> SupportedOp = {
+ Instruction::Add, Instruction::Sub, Instruction::Mul, Instruction::Shl,
+ Instruction::AShr, Instruction::And, Instruction::Or, Instruction::Xor};
+ enum : MaskType {
+ ShlBIT = 0b1,
+ AShrBIT = 0b10,
+ MulBIT = 0b100,
+ AddBIT = 0b1000,
+ SubBIT = 0b10000,
+ AndBIT = 0b100000,
+ OrBIT = 0b1000000,
+ XorBIT = 0b10000000,
+ MainOpBIT = 0b100000000,
+ LLVM_MARK_AS_BITMASK_ENUM(MainOpBIT)
+ };
+ /// Return a non-nullptr if either operand of I is a ConstantInt.
+ /// The second return value represents the operand position. We check the
+ /// right-hand side first (1). If the right hand side is not a ConstantInt and
+ /// the instruction is neither Sub, Shl, nor AShr, we then check the left hand
+ /// side (0).
+ static std::pair<ConstantInt *, unsigned>
+ isBinOpWithConstantInt(Instruction *I) {
+ unsigned Opcode = I->getOpcode();
+ assert(binary_search(SupportedOp, Opcode) && "Unsupported opcode.");
+ (void)SupportedOp;
+ auto *BinOp = cast<BinaryOperator>(I);
+ if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)))
+ return {CI, 1};
+ if (Opcode == Instruction::Sub || Opcode == Instruction::Shl ||
+ Opcode == Instruction::AShr)
+ return {nullptr, 0};
+ if (auto *CI = dyn_cast<ConstantInt>(BinOp->getOperand(0)))
+ return {CI, 0};
+ return {nullptr, 0};
+ }
+ struct InterchangeableInfo {
+ Instruction *I = nullptr;
+ /// The bit it sets represents whether MainOp can be converted to.
+ MaskType Mask = MainOpBIT | XorBIT | OrBIT | AndBIT | SubBIT | AddBIT |
+ MulBIT | AShrBIT | ShlBIT;
+ /// We cannot create an interchangeable instruction that does not exist in
+ /// VL. For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0],
+ /// but << does not exist in VL. In the end, we convert VL to [x * 1, y *
+ /// 1]. SeenBefore is used to know what operations have been seen before.
+ MaskType SeenBefore = 0;
+ InterchangeableInfo(Instruction *I) : I(I) {}
+ /// Return false allows BinOpSameOpcodeHelper to find an alternate
+ /// instruction. Directly setting the mask will destroy the mask state,
+ /// preventing us from determining which instruction it should convert to.
+ bool trySet(MaskType OpcodeInMaskForm, MaskType InterchangeableMask) {
+ if (Mask & InterchangeableMask) {
+ SeenBefore |= OpcodeInMaskForm;
+ Mask &= InterchangeableMask;
+ return true;
+ }
+ return false;
+ }
+ bool equal(unsigned Opcode) {
+ if (Opcode == I->getOpcode())
+ return trySet(MainOpBIT, MainOpBIT);
+ return false;
+ }
+ unsigned getOpcode() const {
+ MaskType Candidate = Mask & SeenBefore;
+ if (Candidate & MainOpBIT)
+ return I->getOpcode();
+ if (Candidate & ShlBIT)
+ return Instruction::Shl;
+ if (Candidate & AShrBIT)
+ return Instruction::AShr;
+ if (Candidate & MulBIT)
+ return Instruction::Mul;
+ if (Candidate & AddBIT)
+ return Instruction::Add;
+ if (Candidate & SubBIT)
+ return Instruction::Sub;
+ if (Candidate & AndBIT)
+ return Instruction::And;
+ if (Candidate & OrBIT)
+ return Instruction::Or;
+ if (Candidate & XorBIT)
+ return Instruction::Xor;
+ llvm_unreachable("Cannot find interchangeable instruction.");
+ }
+ SmallVector<Value *> getOperand(Instruction *To) const {
+ unsigned ToOpcode = To->getOpcode();
+ unsigned FromOpcode = I->getOpcode();
+ if (FromOpcode == ToOpcode)
+ return SmallVector<Value *>(I->operands());
+ assert(binary_search(SupportedOp, ToOpcode) && "Unsupported opcode.");
+ auto [CI, Pos] = isBinOpWithConstantInt(I);
+ const APInt &FromCIValue = CI->getValue();
+ unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
+ APInt ToCIValue;
+ switch (FromOpcode) {
+ case Instruction::Shl:
+ if (ToOpcode == Instruction::Mul) {
+ ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
+ FromCIValue.getZExtValue());
+ } else {
+ assert(FromCIValue.isZero() && "Cannot convert the instruction.");
+ ToCIValue = ToOpcode == Instruction::And
+ ? APInt::getAllOnes(FromCIValueBitWidth)
+ : APInt::getZero(FromCIValueBitWidth);
+ }
+ break;
+ case Instruction::Mul:
+ assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
+ if (ToOpcode == Instruction::Shl) {
+ ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
+ } else {
+ assert(FromCIValue.isOne() && "Cannot convert the instruction.");
+ ToCIValue = ToOpcode == Instruction::And
+ ? APInt::getAllOnes(FromCIValueBitWidth)
+ : APInt::getZero(FromCIValueBitWidth);
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (FromCIValue.isZero()) {
+ ToCIValue = APInt::getZero(FromCIValueBitWidth);
+ } else {
+ assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
+ "Cannot convert the instruction.");
+ ToCIValue = FromCIValue;
+ ToCIValue.negate();
+ }
+ break;
+ case Instruction::And:
+ assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
+ ToCIValue = ToOpcode == Instruction::Mul
+ ? APInt::getOneBitSet(FromCIValueBitWidth, 0)
+ : APInt::getZero(FromCIValueBitWidth);
+ break;
+ default:
+ assert(FromCIValue.isZero() && "Cannot convert the instruction.");
+ ToCIValue = APInt::getZero(FromCIValueBitWidth);
+ break;
+ }
+ Value *LHS = I->getOperand(1 - Pos);
+ Constant *RHS =
+ ConstantInt::get(I->getOperand(Pos)->getType(), ToCIValue);
+ if (Pos == 1)
+ return SmallVector<Value *>({LHS, RHS});
+ return SmallVector<Value *>({RHS, LHS});
+ }
+ };
+ InterchangeableInfo MainOp;
+ InterchangeableInfo AltOp;
+ bool isValidForAlternation(Instruction *I) const {
+ return ::isValidForAlternation(MainOp.I->getOpcode()) &&
+ ::isValidForAlternation(I->getOpcode());
+ }
+ bool initializeAltOp(Instruction *I) {
+ if (AltOp.I)
+ return true;
+ if (!isValidForAlternation(I))
+ return false;
+ AltOp.I = I;
+ return true;
+ }
+
+public:
+ BinOpSameOpcodeHelper(Instruction *MainOp, Instruction *AltOp = nullptr)
+ : MainOp(MainOp), AltOp(AltOp) {
+ assert(is_sorted(SupportedOp) && "SupportedOp is not sorted.");
+ }
+ bool add(Instruction *I) {
+ assert(isa<BinaryOperator>(I) &&
+ "BinOpSameOpcodeHelper only accepts BinaryOperator.");
+ unsigned Opcode = I->getOpcode();
+ MaskType OpcodeInMaskForm;
+ // Prefer Shl, AShr, Mul, Add, Sub, And, Or and Xor over MainOp.
+ switch (Opcode) {
+ case Instruction::Shl:
+ OpcodeInMaskForm = ShlBIT;
+ break;
+ case Instruction::AShr:
+ OpcodeInMaskForm = AShrBIT;
+ break;
+ case Instruction::Mul:
+ OpcodeInMaskForm = MulBIT;
+ break;
+ case Instruction::Add:
+ OpcodeInMaskForm = AddBIT;
+ break;
+ case Instruction::Sub:
+ OpcodeInMaskForm = SubBIT;
+ break;
+ case Instruction::And:
+ OpcodeInMaskForm = AndBIT;
+ break;
+ case Instruction::Or:
+ OpcodeInMaskForm = OrBIT;
+ break;
+ case Instruction::Xor:
+ OpcodeInMaskForm = XorBIT;
+ break;
+ default:
+ return MainOp.equal(Opcode) ||
+ (initializeAltOp(I) && AltOp.equal(Opcode));
+ }
+ MaskType InterchangeableMask = OpcodeInMaskForm;
+ ConstantInt *CI = isBinOpWithConstantInt(I).first;
+ if (CI) {
+ constexpr MaskType CanBeAll =
+ XorBIT | OrBIT | AndBIT | SubBIT | AddBIT | MulBIT | AShrBIT | ShlBIT;
+ const APInt &CIValue = CI->getValue();
+ switch (Opcode) {
+ case Instruction::Shl:
+ InterchangeableMask = CIValue.isZero() ? CanBeAll : MulBIT | ShlBIT;
+ break;
+ case Instruction::Mul:
+ if (CIValue.isOne()) {
+ InterchangeableMask = CanBeAll;
+ break;
+ }
+ if (CIValue.isPowerOf2())
+ InterchangeableMask = MulBIT | ShlBIT;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ InterchangeableMask = CIValue.isZero() ? CanBeAll : SubBIT | AddBIT;
+ break;
+ case Instruction::And:
+ if (CIValue.isAllOnes())
+ InterchangeableMask = CanBeAll;
+ break;
+ default:
+ if (CIValue.isZero())
+ InterchangeableMask = CanBeAll;
+ break;
+ }
+ }
+ return MainOp.trySet(OpcodeInMaskForm, InterchangeableMask) ||
+ (initializeAltOp(I) &&
+ AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
+ }
+ unsigned getMainOpcode() const { return MainOp.getOpcode(); }
+ bool hasAltOp() const { return AltOp.I; }
+ unsigned getAltOpcode() const {
+ return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
+ }
+ SmallVector<Value *> getMainOperand(Instruction *I) const {
+ return MainOp.getOperand(I);
+ }
+ SmallVector<Value *> getAltOperand(Instruction *I) const {
+ return AltOp.getOperand(I);
+ }
+};
+
+bool isConvertible(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+ assert(MainOp && "MainOp cannot be nullptr.");
+ if (I->getOpcode() == MainOp->getOpcode())
+ return true;
+ assert(AltOp && "AltOp cannot be nullptr.");
+ if (I->getOpcode() == AltOp->getOpcode())
+ return true;
+ if (!I->isBinaryOp())
+ return false;
+ BinOpSameOpcodeHelper Converter(MainOp, AltOp);
+ return Converter.add(I) && Converter.add(MainOp) && Converter.add(AltOp);
+}
+
+std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+ assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
+ if (I->getOpcode() == MainOp->getOpcode())
+ return std::make_pair(MainOp, SmallVector<Value *>(I->operands()));
+ // Prefer AltOp instead of interchangeable instruction of MainOp.
+ if (I->getOpcode() == AltOp->getOpcode())
+ return std::make_pair(AltOp, SmallVector<Value *>(I->operands()));
+ assert(I->isBinaryOp() && "Cannot convert the instruction.");
+ BinOpSameOpcodeHelper Converter(I);
+ if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
+ return std::make_pair(MainOp, Converter.getMainOperand(MainOp));
+ return std::make_pair(AltOp, Converter.getAltOperand(AltOp));
+}
/// Main data required for vectorization of instructions.
class InstructionsState {
@@ -840,8 +1157,7 @@ class InstructionsState {
bool isAltShuffle() const { return getMainOp() != getAltOp(); }
bool isOpcodeOrAlt(Instruction *I) const {
- unsigned CheckedOpcode = I->getOpcode();
- return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
+ return isConvertible(I, MainOp, AltOp);
}
/// Checks if main/alt instructions are shift operations.
@@ -886,18 +1202,6 @@ class InstructionsState {
} // end anonymous namespace
-/// \returns true if \p Opcode is allowed as part of the main/alternate
-/// instruction for SLP vectorization.
-///
-/// Example of unsupported opcode is SDIV that can potentially cause UB if the
-/// "shuffled out" lane would result in division by zero.
-static bool isValidForAlternation(unsigned Opcode) {
- if (Instruction::isIntDivRem(Opcode))
- return false;
-
- return true;
-}
-
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
const TargetLibraryInfo &TLI);
@@ -955,6 +1259,17 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
(VL.size() == 2 && InstCnt < 2))
return InstructionsState::invalid();
+ auto FindInstructionWithOpcode = [&](unsigned Opcode) {
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V))
+ continue;
+ auto *Inst = cast<Instruction>(V);
+ if (Inst->getOpcode() == Opcode)
+ return Inst;
+ }
+ llvm_unreachable("Opcode not found.");
+ };
+
bool IsCastOp = isa<CastInst>(MainOp);
bool IsBinOp = isa<BinaryOperator>(MainOp);
bool IsCmpOp = isa<CmpInst>(MainOp);
@@ -964,6 +1279,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned Opcode = MainOp->getOpcode();
unsigned AltOpcode = Opcode;
+ BinOpSameOpcodeHelper BinOpHelper(MainOp);
bool SwappedPredsCompatible = IsCmpOp && [&]() {
SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
UniquePreds.insert(BasePred);
@@ -1010,14 +1326,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
- if (InstOpcode == Opcode || InstOpcode == AltOpcode)
+ if (BinOpHelper.add(I))
continue;
- if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
- isValidForAlternation(Opcode)) {
- AltOpcode = InstOpcode;
- AltOp = I;
- continue;
- }
} else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = MainOp->getOperand(0);
Type *Ty0 = Op0->getType();
@@ -1118,6 +1428,12 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}
+ if (IsBinOp) {
+ MainOp = FindInstructionWithOpcode(BinOpHelper.getMainOpcode());
+ AltOp = FindInstructionWithOpcode(BinOpHelper.getAltOpcode());
+ }
+ assert((MainOp == AltOp || !allSameOpcode(VL)) &&
+ "Incorrect implementation of allSameOpcode.");
return InstructionsState(MainOp, AltOp);
}
@@ -2520,11 +2836,12 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- bool IsInverseOperation = !isCommutative(cast<Instruction>(V));
+ auto [SelectedOp, Ops] =
+ convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
+ bool IsInverseOperation = !isCommutative(SelectedOp);
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
- OpsVec[OpIdx][Lane] = {cast<Instruction>(V)->getOperand(OpIdx), APO,
- false};
+ OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
}
}
}
@@ -10735,7 +11052,9 @@ void BoUpSLP::transformNodes() {
// same opcode and same parent block or all constants.
if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) ||
!(!E.hasState() || E.getOpcode() == Instruction::Load ||
- E.isAltShuffle() || !allSameBlock(VL)) ||
+ // We use allSameOpcode instead of isAltShuffle because we don't
+ // want to use interchangeable instruction here.
+ !allSameOpcode(VL) || !allSameBlock(VL)) ||
allConstant(VL) || isSplat(VL))
continue;
if (ForceLoadGather && E.hasState() && E.getOpcode() == Instruction::Load)
@@ -10780,7 +11099,7 @@ void BoUpSLP::transformNodes() {
if (IsSplat)
continue;
InstructionsState S = getSameOpcode(Slice, *TLI);
- if (!S || S.isAltShuffle() || !allSameBlock(Slice) ||
+ if (!S || !allSameOpcode(Slice) || !allSameBlock(Slice) ||
(S.getOpcode() == Instruction::Load &&
areKnownNonVectorizableLoads(Slice)) ||
(S.getOpcode() != Instruction::Load &&
@@ -12583,14 +12902,22 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (isa<PoisonValue>(UniqueValues[Idx]))
return InstructionCost(TTI::TCC_Free);
- auto *VI = cast<Instruction>(UniqueValues[Idx]);
- unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1;
- TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
- TTI::OperandValueInfo Op2Info =
- TTI::getOperandInfo(VI->getOperand(OpIdx));
- SmallVector<const Value *> Operands(VI->operand_values());
+ // We cannot retrieve the operand from UniqueValues[Idx] because an
+ // interchangeable instruction may be used. The order and the actual
+ // operand might differ from what is retrieved from UniqueValues[Idx].
+ Value *Op1 = E->getOperand(0)[Idx];
+ Value *Op2;
+ SmallVector<const Value *, 2> Operands(1, Op1);
+ if (isa<UnaryOperator>(UniqueValues[Idx])) {
+ Op2 = Op1;
+ } else {
+ Op2 = E->getOperand(1)[Idx];
+ Operands.push_back(Op2);
+ }
+ TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Op1);
+ TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(Op2);
return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
- Op1Info, Op2Info, Operands, VI);
+ Op1Info, Op2Info, Operands);
};
auto GetVectorCost = [=](InstructionCost CommonCost) {
if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
@@ -17071,7 +17398,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
- propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+ propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
if (auto *I = dyn_cast<Instruction>(V)) {
V = ::propagateMetadata(I, E->Scalars);
// Drop nuw flags for abs(sub(commutative), true).
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index feb4ad865f314..d527d38adbee3 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index fd3d4ab80b29c..ff897180cc9b7 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
-; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 7ab5e4d6cb787..481d586e6658a 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index f46a5d84a86cc..a39e602e2da71 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 889f5a95c81d6..299677ca80b34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -10,15 +10,10 @@ define void @test(ptr %0, ptr %1, ptr %2) {
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
+; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 43c42c1ea2bfb..03a89e54e4212 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -8,15 +8,13 @@ define void @test() {
; CHECK: [[BB1:.*]]:
; CHECK-NEXT: br label %[[BB2:.*]]
; CHECK: [[BB2]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP4:%.*]], %[[BB6]] ]
; CHECK-NEXT: ret void
; CHECK: [[BB6]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
-; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index 766916fe71f35..c4ddc5d63cc04 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -7,14 +7,12 @@ define i16 @test(i16 %v1, i16 %v2) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
-; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
index 1c62e57edfc46..514d5f974cb16 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -4,17 +4,13 @@
define i64 @foo(i32 %tmp7) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, i32 [[TMP24]], i32 4
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 [[TMP24]], i32 6
-; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
+; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
@@ -29,7 +25,7 @@ bb:
%tmp4 = xor i32 %tmp3, 0
%tmp6 = sub i32 0, 0
%tmp8 = sub i32 %tmp7, 0
- %tmp9 = sub nsw i32 0, undef
+ %tmp9 = sub nsw i32 0, poison
%tmp10 = add nsw i32 0, %tmp6
%tmp11 = sub nsw i32 0, %tmp8
%tmp12 = add i32 0, %tmp10
@@ -44,10 +40,10 @@ bb:
%tmp21 = add i32 %tmp20, %tmp17
%tmp22 = sub i32 0, 0
%tmp23 = add i32 0, 0
- %tmp24 = sub i32 undef, 0
- %tmp25 = add nsw i32 %tmp23, undef
+ %tmp24 = sub i32 poison, 0
+ %tmp25 = add nsw i32 %tmp23, poison
%tmp26 = add nsw i32 %tmp24, %tmp22
- %tmp27 = sub nsw i32 undef, %tmp24
+ %tmp27 = sub nsw i32 poison, %tmp24
%tmp28 = add i32 0, %tmp25
%tmp29 = xor i32 %tmp28, 0
%tmp30 = add i32 0, %tmp26
@@ -58,7 +54,7 @@ bb:
%tmp35 = add i32 %tmp34, %tmp29
%tmp36 = add i32 %tmp35, 0
%tmp37 = add i32 %tmp36, %tmp33
- %tmp38 = sub nsw i32 0, undef
+ %tmp38 = sub nsw i32 0, poison
%tmp39 = add i32 0, %tmp38
%tmp40 = xor i32 %tmp39, 0
%tmp41 = add i32 0, %tmp37
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 02c3173adc654..c6f5308cf54aa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -9,9 +9,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index b39480b12496b..5a9ea0d292fa0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -6,26 +6,26 @@
define i64 @foo() {
; CHECK-LABEL: define i64 @foo() {
; CHECK-NEXT: bb:
+; CHECK-NEXT: [[ADD7:%.*]] = add i64 0, 0
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
-; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
; CHECK-NEXT: ret i64 0
; CHECK: bb3:
-; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
-; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0
-; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
-; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
+; CHECK-NEXT: [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
+; CHECK-NEXT: [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; CHECK-NEXT: [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
;
; FORCED-LABEL: define i64 @foo() {
; FORCED-NEXT: bb:
-; FORCED-NEXT: [[TMP8:%.*]] = add i64 0, 0
+; FORCED-NEXT: [[ADD7:%.*]] = add i64 0, 0
; FORCED-NEXT: br label [[BB3:%.*]]
; FORCED: bb1:
; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
@@ -36,12 +36,10 @@ define i64 @foo() {
; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; FORCED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
-; FORCED-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
+; FORCED-NEXT: [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
+; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
+; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
+; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
index 2a5bfa7390770..0198b1c5cb846 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
@@ -9,9 +9,7 @@ define i32 @test() {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
index e6a166c27ac49..230e165e43edc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -8,10 +8,8 @@ define i32 @foo() {
; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
-; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index 1163c8219dabe..034fe82862950 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -4,6 +4,24 @@
; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: bb4:
+; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
+; CHECK: bb5:
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
+; CHECK-NEXT: ret void
+;
; FORCED-LABEL: define void @foo() {
; FORCED-NEXT: bb:
; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
@@ -11,9 +29,7 @@ define void @foo() {
; FORCED: bb1:
; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
-; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
+; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
@@ -21,29 +37,9 @@ define void @foo() {
; FORCED: bb4:
; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; FORCED: bb5:
-; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
+; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
; FORCED-NEXT: ret void
;
-; CHECK-LABEL: define void @foo() {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: br label [[BB1:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP2]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
-; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
-; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
-; CHECK-NEXT: br label [[BB4]]
-; CHECK: bb4:
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
-; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
-; CHECK: bb5:
-; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
-; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
-; CHECK-NEXT: ret void
-;
bb:
br label %bb1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index cb02f4d10923c..ad8e905a8ca02 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -330,9 +330,7 @@ define void @only_arcp(ptr %x) {
define void @addsub_all_nsw(ptr %x) {
; CHECK-LABEL: @addsub_all_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
@@ -361,9 +359,7 @@ define void @addsub_all_nsw(ptr %x) {
define void @addsub_some_nsw(ptr %x) {
; CHECK-LABEL: @addsub_some_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
@@ -392,9 +388,7 @@ define void @addsub_some_nsw(ptr %x) {
define void @addsub_no_nsw(ptr %x) {
; CHECK-LABEL: @addsub_no_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
index 81f3bf99f3fd8..7fe6941d52da7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
@@ -9,16 +9,16 @@ define i32 @test(i1 %cond) {
; CHECK: [[BB]]:
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = or i32 1, 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[P1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[OR92]] = or i32 1, 0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
+; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[OP_RDX]], i32 0
-; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index cda88620ab88a..fff2b72df613e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -14,10 +14,8 @@ define void @test() {
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i16> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> zeroinitializer, [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[TMP14]] to <4 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[TMP13]] to <4 x i32>
; CHECK-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
index fcc295de62adf..a17ccb4b46ef9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll
@@ -6,11 +6,9 @@ define i1 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
-; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP3]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 2, i32 2, i32 7, i32 2>
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
index 6e2a43ac5f9f1..15dd6756cd7db 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
@@ -242,13 +242,18 @@ exit:
}
define void @store_try_reorder(ptr %dst) {
-; CHECK-LABEL: @store_try_reorder(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0
-; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
-; CHECK-NEXT: ret void
+; NON-POW2-LABEL: @store_try_reorder(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @store_try_reorder(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
+; POW2-ONLY-NEXT: ret void
;
entry:
%add = add i32 0, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index 869a9d1aee80e..4f3d551e21122 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -192,9 +192,7 @@ define void @addsub0(ptr noalias %dst, ptr noalias %src) {
; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <2 x i32> [[TMP2]], <i32 -2, i32 -3>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -2, i32 3>
; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[INCDEC_PTR3]], align 4
; CHECK-NEXT: ret void
;
@@ -225,9 +223,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1)
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 1>
; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4
; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
index c250029519590..9b6511d0d8284 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll
@@ -1,18 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
define <2 x i32> @test(i32 %arg) {
-; CHECK-LABEL: define <2 x i32> @test(
-; CHECK-SAME: i32 [[ARG:%.*]]) {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
-; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
-; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
-; CHECK-NEXT: ret <2 x i32> [[TMP1]]
+; X86-LABEL: define <2 x i32> @test(
+; X86-SAME: i32 [[ARG:%.*]]) {
+; X86-NEXT: bb:
+; X86-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
+; X86-NEXT: [[MUL:%.*]] = mul i32 0, 1
+; X86-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
+; X86-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; X86-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; X86-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
+; X86-NEXT: ret <2 x i32> [[TMP1]]
+;
+; AARCH64-LABEL: define <2 x i32> @test(
+; AARCH64-SAME: i32 [[ARG:%.*]]) {
+; AARCH64-NEXT: bb:
+; AARCH64-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ARG]], i32 0
+; AARCH64-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[TMP0]], zeroinitializer
+; AARCH64-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; AARCH64-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; AARCH64-NEXT: [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; AARCH64-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; AARCH64-NEXT: ret <2 x i32> [[TMP1]]
;
bb:
%or = or i32 %arg, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
new file mode 100644
index 0000000000000..c3b0de084b748
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
+
+define void @test(ptr %a, ptr %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 0
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GEP4]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep0 = getelementptr inbounds i32, ptr %a, i64 0
+ %gep1 = getelementptr inbounds i32, ptr %a, i64 1
+ %gep2 = getelementptr inbounds i32, ptr %a, i64 2
+ %gep3 = getelementptr inbounds i32, ptr %a, i64 3
+ %0 = load i32, ptr %gep0, align 4
+ %1 = load i32, ptr %gep1, align 4
+ %2 = load i32, ptr %gep2, align 4
+ %3 = load i32, ptr %gep3, align 4
+ %op0 = shl i32 %0, 1
+ %op1 = add i32 %1, zeroinitializer
+ %op2 = mul i32 %2, 2
+ %op3 = shl i32 %3, zeroinitializer
+ %gep4 = getelementptr inbounds i32, ptr %b, i64 0
+ %gep5 = getelementptr inbounds i32, ptr %b, i64 1
+ %gep6 = getelementptr inbounds i32, ptr %b, i64 2
+ %gep7 = getelementptr inbounds i32, ptr %b, i64 3
+ store i32 %op0, ptr %gep4, align 4
+ store i32 %op1, ptr %gep5, align 4
+ store i32 %op2, ptr %gep6, align 4
+ store i32 %op3, ptr %gep7, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
index 056b6222cae72..caca410f056c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
@@ -6,11 +6,9 @@ define void @func(i32 %0) {
; CHECK-SAME: i32 [[TMP0:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
index 732b50396a460..cf5927bf58327 100644
--- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll
@@ -12,9 +12,7 @@ define i32 @test() {
; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5]] = add <2 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb4:
; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]
>From aa560bd46de0a936579b7907b5e9801c22fd735c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Mar 2025 21:03:48 -0700
Subject: [PATCH 2/3] [SLP] Pre-commit test.
---
.../X86/BinOpSameOpcodeHelper.ll | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll b/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
new file mode 100644
index 0000000000000..c90d7e7f332f3
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S %s | FileCheck %s
+
+define void @test() {
+entry:
+ %0 = mul i64 0, 0
+ %1 = lshr i64 %0, 0
+ %2 = sub i64 0, 1
+ %3 = lshr i64 %2, 0
+ %umin120 = call i64 @llvm.umin.i64(i64 %1, i64 %3)
+ %4 = sub i64 0, 0
+ %5 = lshr i64 %4, 0
+ %umin122 = call i64 @llvm.umin.i64(i64 %umin120, i64 %5)
+ %6 = add i64 0, 1
+ %7 = lshr i64 %6, 0
+ %umin123 = call i64 @llvm.umin.i64(i64 %umin122, i64 %7)
+ %umin124 = call i64 @llvm.umin.i64(i64 %umin123, i64 0)
+ ret void
+}
+
+declare i64 @llvm.umin.i64(i64, i64)
>From 58f09d66f8b22bf52994c8f3e125f99506c96fe8 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Mar 2025 22:56:49 -0700
Subject: [PATCH 3/3] [SLP] Fix incorrect convert when interchangeable
instruction is used.
If the Converter has AltOp, that means I should be converted to AltOp
instead of MainOp (which is AltOp in BinOpSameOpcodeHelper).
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 +++------
.../SLPVectorizer/X86/BinOpSameOpcodeHelper.ll | 15 +++++++++++++++
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 84ed1c11df9e0..00b7bb2956a92 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1095,12 +1095,9 @@ class BinOpSameOpcodeHelper {
unsigned getAltOpcode() const {
return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
}
- SmallVector<Value *> getMainOperand(Instruction *I) const {
+ SmallVector<Value *> getOperand(Instruction *I) const {
return MainOp.getOperand(I);
}
- SmallVector<Value *> getAltOperand(Instruction *I) const {
- return AltOp.getOperand(I);
- }
};
bool isConvertible(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
@@ -1127,8 +1124,8 @@ convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
assert(I->isBinaryOp() && "Cannot convert the instruction.");
BinOpSameOpcodeHelper Converter(I);
if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
- return std::make_pair(MainOp, Converter.getMainOperand(MainOp));
- return std::make_pair(AltOp, Converter.getAltOperand(AltOp));
+ return std::make_pair(MainOp, Converter.getOperand(MainOp));
+ return std::make_pair(AltOp, Converter.getOperand(AltOp));
}
/// Main data required for vectorization of instructions.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll b/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
index c90d7e7f332f3..6f27555aeb3f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/BinOpSameOpcodeHelper.ll
@@ -2,6 +2,21 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S %s | FileCheck %s
define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 0, 0
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, 1
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 0
+; CHECK-NEXT: [[UMIN120:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, 0
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 0
+; CHECK-NEXT: [[UMIN122:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN120]], i64 [[TMP4]])
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 0, 1
+; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 0
+; CHECK-NEXT: [[UMIN123:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN122]], i64 [[TMP6]])
+; CHECK-NEXT: [[UMIN124:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN123]], i64 0)
+; CHECK-NEXT: ret void
+;
entry:
%0 = mul i64 0, 0
%1 = lshr i64 %0, 0
More information about the llvm-commits
mailing list