[llvm] a415d68 - Revert "[SLP]Initial support for copyable elements (non-schedulable only)"
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 07:39:38 PDT 2025
Author: Alexey Bataev
Date: 2025-07-22T07:39:32-07:00
New Revision: a415d68e48c4672c63082192626fec9bf9ae9c2c
URL: https://github.com/llvm/llvm-project/commit/a415d68e48c4672c63082192626fec9bf9ae9c2c
DIFF: https://github.com/llvm/llvm-project/commit/a415d68e48c4672c63082192626fec9bf9ae9c2c.diff
LOG: Revert "[SLP]Initial support for copyable elements (non-schedulable only)"
This reverts commit e202dba288edd47f1b370cc43aa8cd36a924e7c1 to try to
resolve compile time issues, reported in https://llvm-compile-time-tracker.com/compare.php?from=36089e5d983fe9ae00f497c2d500f37227f82db1&to=e202dba288edd47f1b370cc43aa8cd36a924e7c1&stat=instructions%3Au&details=on
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll
llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll
llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5296364c92f0e..0d0b342505214 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,12 +206,6 @@ static cl::opt<bool> VectorizeNonPowerOf2(
"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
cl::desc("Try to vectorize with non-power-of-2 number of elements."));
-/// Enables vectorization of copyable elements.
-static cl::opt<bool> VectorizeCopyableElements(
- "slp-copyable-elements", cl::init(true), cl::Hidden,
- cl::desc("Try to replace values with the idempotent instructions for "
- "better vectorization."));
-
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@@ -861,13 +855,6 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
return *EI->idx_begin();
}
-namespace llvm {
-/// Checks if the specified value does not require scheduling. It does not
-/// require scheduling if all operands and all users do not need to be scheduled
-/// in the current basic block.
-static bool doesNotNeedToBeScheduled(Value *V);
-} // namespace llvm
-
namespace {
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
@@ -970,33 +957,6 @@ class BinOpSameOpcodeHelper {
return Instruction::Xor;
llvm_unreachable("Cannot find interchangeable instruction.");
}
-
- /// Return true if the instruction can be converted to \p Opcode.
- bool hasCandidateOpcode(unsigned Opcode) const {
- MaskType Candidate = Mask & SeenBefore;
- switch (Opcode) {
- case Instruction::Shl:
- return Candidate & ShlBIT;
- case Instruction::AShr:
- return Candidate & AShrBIT;
- case Instruction::Mul:
- return Candidate & MulBIT;
- case Instruction::Add:
- return Candidate & AddBIT;
- case Instruction::Sub:
- return Candidate & SubBIT;
- case Instruction::And:
- return Candidate & AndBIT;
- case Instruction::Or:
- return Candidate & OrBIT;
- case Instruction::Xor:
- return Candidate & XorBIT;
- default:
- break;
- }
- llvm_unreachable("Cannot find interchangeable instruction.");
- }
-
SmallVector<Value *> getOperand(const Instruction *To) const {
unsigned ToOpcode = To->getOpcode();
unsigned FromOpcode = I->getOpcode();
@@ -1157,10 +1117,6 @@ class BinOpSameOpcodeHelper {
AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
}
unsigned getMainOpcode() const { return MainOp.getOpcode(); }
- /// Checks if the list of potential opcodes includes \p Opcode.
- bool hasCandidateOpcode(unsigned Opcode) const {
- return MainOp.hasCandidateOpcode(Opcode);
- }
bool hasAltOp() const { return AltOp.I; }
unsigned getAltOpcode() const {
return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
@@ -1196,8 +1152,6 @@ class InstructionsState {
/// GetVectorCost.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
- /// Wether the instruction state represents copyable instructions.
- bool HasCopyables = false;
public:
Instruction *getMainOp() const {
@@ -1236,11 +1190,9 @@ class InstructionsState {
if (!I->isBinaryOp())
return nullptr;
BinOpSameOpcodeHelper Converter(MainOp);
- if (!Converter.add(I) || !Converter.add(MainOp))
- return nullptr;
- if (Converter.hasAltOp() && !isAltShuffle())
- return nullptr;
- return Converter.hasAltOp() ? AltOp : MainOp;
+ if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
+ return MainOp;
+ return AltOp;
}
/// Checks if main/alt instructions are shift operations.
@@ -1285,63 +1237,9 @@ class InstructionsState {
explicit operator bool() const { return valid(); }
InstructionsState() = delete;
- InstructionsState(Instruction *MainOp, Instruction *AltOp,
- bool HasCopyables = false)
- : MainOp(MainOp), AltOp(AltOp), HasCopyables(HasCopyables) {}
+ InstructionsState(Instruction *MainOp, Instruction *AltOp)
+ : MainOp(MainOp), AltOp(AltOp) {}
static InstructionsState invalid() { return {nullptr, nullptr}; }
-
- bool isCopyableElement(Value *V) const {
- assert(valid() && "InstructionsState is invalid.");
- if (!HasCopyables)
- return false;
- if (isAltShuffle() || getOpcode() == Instruction::GetElementPtr)
- return false;
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return !isa<PoisonValue>(V);
- if (I->getParent() != MainOp->getParent() &&
- (!isVectorLikeInstWithConstOps(I) ||
- !isVectorLikeInstWithConstOps(MainOp)))
- return true;
- if (I->getOpcode() == MainOp->getOpcode())
- return false;
- if (!I->isBinaryOp())
- return true;
- BinOpSameOpcodeHelper Converter(MainOp);
- return !Converter.add(I) || !Converter.add(MainOp) ||
- Converter.hasAltOp() || !Converter.hasCandidateOpcode(getOpcode());
- }
-
- /// Checks if the value is non-schedulable.
- bool isNonSchedulable(Value *V) const {
- assert(valid() && "InstructionsState is invalid.");
- auto *I = dyn_cast<Instruction>(V);
- if (!HasCopyables)
- return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
- doesNotNeedToBeScheduled(V);
- // MainOp for copyables always schedulable to correctly identify
- // non-schedulable copyables.
- if (isCopyableElement(V)) {
- auto IsNonSchedulableCopyableElement = [this](Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- return !I || isa<PHINode>(I) || I->getParent() != MainOp->getParent() ||
- (doesNotNeedToBeScheduled(I) &&
- // If the copyable instructions comes after MainOp
- // (non-schedulable, but used in the block) - cannot vectorize
- // it, will possibly generate use before def.
- (isVectorLikeInstWithConstOps(I) || !MainOp->comesBefore(I)));
- };
-
- return IsNonSchedulableCopyableElement(V);
- }
- return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
- doesNotNeedToBeScheduled(V);
- }
-
- bool areInstructionsWithCopyableElements() const {
- assert(valid() && "InstructionsState is invalid.");
- return HasCopyables;
- }
};
std::pair<Instruction *, SmallVector<Value *>>
@@ -3001,6 +2899,9 @@ class BoUpSLP {
for (OperandDataVec &Ops : OpsVec)
Ops.resize(NumLanes);
for (unsigned Lane : seq<unsigned>(NumLanes)) {
+ Value *V = VL[Lane];
+ assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
+ "Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -3011,24 +2912,17 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- auto *I = dyn_cast<Instruction>(VL[Lane]);
- if (!I && isa<PoisonValue>(VL[Lane])) {
+ if (isa<PoisonValue>(V)) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
continue;
}
- bool IsInverseOperation = false;
- if (S.isCopyableElement(VL[Lane])) {
- // The value is a copyable element.
- IsInverseOperation = !isCommutative(MainOp);
- } else {
- assert(I && "Expected instruction");
- auto [SelectedOp, Ops] = convertTo(I, S);
- // We cannot check commutativity by the converted instruction
- // (SelectedOp) because isCommutative also examines def-use
- // relationships.
- IsInverseOperation = !isCommutative(SelectedOp, I);
- }
+ auto [SelectedOp, Ops] = convertTo(cast<Instruction>(V), S);
+ // We cannot check commutativity by the converted instruction
+ // (SelectedOp) because isCommutative also examines def-use
+ // relationships.
+ bool IsInverseOperation =
+ !isCommutative(SelectedOp, cast<Instruction>(V));
for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
@@ -3898,9 +3792,6 @@ class BoUpSLP {
/// reordering of operands during buildTreeRec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
- /// Copyable elements of the entry node.
- SmallPtrSet<const Value *, 4> CopyableElements;
-
/// MainOp and AltOp are recorded inside. S should be obtained from
/// newTreeEntry.
InstructionsState S = InstructionsState::invalid();
@@ -3929,7 +3820,11 @@ class BoUpSLP {
void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
/// Marks the node as one that does not require scheduling.
- void setDoesNotNeedToSchedule() { DoesNotNeedToSchedule = true; }
+ void setDoesNotNeedToSchedule() {
+ assert(::doesNotNeedToSchedule(Scalars) &&
+ "Expected to not need scheduling");
+ DoesNotNeedToSchedule = true;
+ }
/// Returns true if the node is marked as one that does not require
/// scheduling.
bool doesNotNeedToSchedule() const { return DoesNotNeedToSchedule; }
@@ -4001,20 +3896,6 @@ class BoUpSLP {
bool hasState() const { return S.valid(); }
- /// Add \p V to the list of copyable elements.
- void addCopyableElement(Value *V) {
- assert(S.isCopyableElement(V) && "Not a copyable element.");
- CopyableElements.insert(V);
- }
-
- /// Returns true if \p V is a copyable element.
- bool isCopyableElement(Value *V) const {
- return CopyableElements.contains(V);
- }
-
- /// Returns true if any scalar in the list is a copyable element.
- bool hasCopyableElements() const { return !CopyableElements.empty(); }
-
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
unsigned findLaneForValue(Value *V) const {
@@ -4087,8 +3968,6 @@ class BoUpSLP {
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
dbgs() << "State: ";
- if (S && hasCopyableElements())
- dbgs() << "[[Copyable]] ";
switch (State) {
case Vectorize:
if (InterleaveFactor > 0) {
@@ -4266,20 +4145,12 @@ class BoUpSLP {
}
}
} else if (!Last->isGather()) {
- if (isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) ||
- (!S.areInstructionsWithCopyableElements() &&
- doesNotNeedToSchedule(VL)) ||
- all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
+ if (doesNotNeedToSchedule(VL))
Last->setDoesNotNeedToSchedule();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
- if (S.isCopyableElement(V)) {
- Last->addCopyableElement(V);
- continue;
- }
auto It = ScalarToTreeEntries.find(V);
if (It == ScalarToTreeEntries.end()) {
ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
@@ -4291,14 +4162,16 @@ class BoUpSLP {
}
}
// Update the scheduler bundle to point to this TreeEntry.
- assert((!Bundle.getBundle().empty() || Last->doesNotNeedToSchedule()) &&
+ assert((!Bundle.getBundle().empty() || isa<PHINode>(S.getMainOp()) ||
+ isVectorLikeInstWithConstOps(S.getMainOp()) ||
+ Last->doesNotNeedToSchedule()) &&
"Bundle and VL out of sync");
if (!Bundle.getBundle().empty()) {
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
auto *BundleMember = Bundle.getBundle().begin();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
- if (S.isNonSchedulable(V) || !Processed.insert(V).second)
+ if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
continue;
++BundleMember;
}
@@ -4407,8 +4280,7 @@ class BoUpSLP {
/// in general.
ScalarsVectorizationLegality
getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx,
- bool TryCopyableElementsVectorization) const;
+ const EdgeInfo &UserTreeIdx) const;
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
@@ -5124,8 +4996,7 @@ class BoUpSLP {
/// Build a bundle from the ScheduleData nodes corresponding to the
/// scalar instruction for each lane.
- ScheduleBundle &buildBundle(ArrayRef<Value *> VL,
- const InstructionsState &S);
+ ScheduleBundle &buildBundle(ArrayRef<Value *> VL);
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
@@ -8011,7 +7882,7 @@ void BoUpSLP::buildExternalUses(
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
- if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar))
+ if (!isa<Instruction>(Scalar))
continue;
// All uses must be replaced already? No need to do it again.
auto It = ScalarToExtUses.find(Scalar);
@@ -9741,8 +9612,7 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons operations are still valid for
// vectorization (div/rem are not allowed).
- if (!S.areInstructionsWithCopyableElements() &&
- !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
@@ -9891,95 +9761,13 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
}
namespace {
-/// Class accepts incoming list of values, checks if it is able to model
-/// "copyable" values as compatible operations, and generates the list of values
-/// for scheduling and list of operands doe the new nodes.
+/// Class accepts incoming list of values and generates the list of values
+/// for scheduling and list of operands for the new nodes.
class InstructionsCompatibilityAnalysis {
DominatorTree &DT;
const DataLayout &DL;
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
- unsigned MainOpcode = 0;
- Instruction *MainOp = nullptr;
-
- /// Identifies the best candidate value, which represents main opcode
- /// operation.
- /// Currently the best candidate is the Add instruction with the parent
- /// block with the highest DFS incoming number (block, that dominates other).
- void findAndSetMainInstruction(ArrayRef<Value *> VL) {
- BasicBlock *Parent = nullptr;
- // Checks if the instruction has supported opcode.
- auto IsSupportedOpcode = [](Instruction *I) {
- return I && I->getOpcode() == Instruction::Add;
- };
- SmallDenseSet<Value *, 8> Operands;
- for (Value *V : VL) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- continue;
- if (!DT.isReachableFromEntry(I->getParent()))
- continue;
- if (!MainOp) {
- MainOp = I;
- Parent = I->getParent();
- Operands.insert(I->op_begin(), I->op_end());
- continue;
- }
- if (Parent == I->getParent()) {
- if (!IsSupportedOpcode(MainOp))
- MainOp = I;
- if (MainOp->getOpcode() == I->getOpcode() &&
- doesNotNeedToBeScheduled(MainOp) && !doesNotNeedToBeScheduled(I))
- MainOp = I;
- Operands.insert(I->op_begin(), I->op_end());
- continue;
- }
- auto *NodeA = DT.getNode(Parent);
- auto *NodeB = DT.getNode(I->getParent());
- assert(NodeA && "Should only process reachable instructions");
- assert(NodeB && "Should only process reachable instructions");
- assert((NodeA == NodeB) ==
- (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
- "Different nodes should have
diff erent DFS numbers");
- if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
- MainOp = I;
- Parent = I->getParent();
- Operands.clear();
- Operands.insert(I->op_begin(), I->op_end());
- }
- }
- if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
- MainOp = nullptr;
- return;
- }
- MainOpcode = MainOp->getOpcode();
- }
-
- /// Returns the idempotent value for the \p MainOp with the detected \p
- /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
- /// the operand itself, since V or V == V.
- Value *selectBestIdempotentValue() const {
- assert(MainOpcode == Instruction::Add && "Unsupported opcode");
- return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
- !MainOp->isCommutative());
- }
-
- /// Returns the value and operands for the \p V, considering if it is original
- /// instruction and its actual operands should be returned, or it is a
- /// copyable element and its should be represented as idempotent instruction.
- SmallVector<Value *> getOperands(const InstructionsState &S, Value *V) const {
- if (isa<PoisonValue>(V))
- return {V, V};
- if (!S.isCopyableElement(V))
- return convertTo(cast<Instruction>(V), S).second;
- switch (MainOpcode) {
- case Instruction::Add:
- return {V, selectBestIdempotentValue()};
- default:
- break;
- }
- llvm_unreachable("Unsupported opcode");
- }
/// Builds operands for the original instructions.
void
@@ -10140,151 +9928,22 @@ class InstructionsCompatibilityAnalysis {
const TargetLibraryInfo &TLI)
: DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
- InstructionsState
- buildInstructionsState(ArrayRef<Value *> VL, const BoUpSLP &R,
- bool TryCopyableElementsVectorization,
- bool WithProfitabilityCheck = false) {
- InstructionsState S = getSameOpcode(VL, TLI);
- if (S)
- return S;
- if (!VectorizeCopyableElements || !TryCopyableElementsVectorization)
- return S;
- findAndSetMainInstruction(VL);
- if (!MainOp)
- return InstructionsState::invalid();
- S = InstructionsState(MainOp, MainOp, /*HasCopyables=*/true);
- // TODO: Remove this check once support for schulable copyables is landed.
- if (any_of(VL, [&](Value *V) {
- return S.isCopyableElement(V) && !S.isNonSchedulable(V);
- }))
- return InstructionsState::invalid();
-
- if (!WithProfitabilityCheck)
- return S;
- // Check if it is profitable to vectorize the instruction.
- SmallVector<BoUpSLP::ValueList> Operands = buildOperands(S, VL);
- if (VL.size() == 2) {
- // Check if the operands allow better vectorization.
- SmallVector<std::pair<Value *, Value *>, 4> Candidates;
- Candidates.emplace_back(Operands[0][0], Operands[0][1]);
- Candidates.emplace_back(Operands[1][0], Operands[1][1]);
- if (isCommutative(MainOp)) {
- Candidates.emplace_back(Operands[0][0], Operands[1][1]);
- Candidates.emplace_back(Operands[1][0], Operands[0][1]);
- }
- // No good candidates - not profitable.
- if (!R.findBestRootPair(Candidates,
- BoUpSLP::LookAheadHeuristics::ScoreSplat)) {
- // Deeper analysis for 2 splats/constants.
- SmallVector<std::pair<Value *, Value *>, 4> Candidates1, Candidates2;
- Candidates1.emplace_back(Operands[0][0], Operands[0][1]);
- Candidates2.emplace_back(Operands[1][0], Operands[1][1]);
- bool Res =
- R.findBestRootPair(Candidates1) && R.findBestRootPair(Candidates2);
- if (!Res && isCommutative(MainOp)) {
- Candidates1.clear();
- Candidates2.clear();
- Candidates1.emplace_back(Operands[0][0], Operands[1][1]);
- Candidates2.emplace_back(Operands[1][0], Operands[0][1]);
- Res = R.findBestRootPair(Candidates1) &&
- R.findBestRootPair(Candidates2);
- }
- if (!Res)
- return InstructionsState::invalid();
- }
- return S;
- }
- assert(Operands.size() == 2 && "Unexpected number of operands!");
- unsigned CopyableNum =
- count_if(VL, [&](Value *V) { return S.isCopyableElement(V); });
- if (CopyableNum < VL.size() / 2)
- return S;
- // Check profitability if number of copyables > VL.size() / 2.
- // 1. Reorder operands for better matching.
- if (isCommutative(MainOp)) {
- for (auto &Ops : Operands) {
- // Make instructions the first operands.
- if (!isa<Instruction>(Ops.front()) && isa<Instruction>(Ops.back())) {
- std::swap(Ops.front(), Ops.back());
- continue;
- }
- // Make constants the second operands.
- if (isa<Constant>(Ops.front())) {
- std::swap(Ops.front(), Ops.back());
- continue;
- }
- }
- }
- // 2. Check, if operands can be vectorized.
- if (count_if(Operands.back(), IsaPred<Instruction>) > 1)
- return InstructionsState::invalid();
- auto CheckOperand = [&](ArrayRef<Value *> Ops) {
- if (allConstant(Ops) || isSplat(Ops))
- return true;
- // Check if it is "almost" splat, i.e. has >= 4 elements and only single
- // one is
diff erent.
- constexpr unsigned Limit = 4;
- if (Operands.front().size() >= Limit) {
- SmallDenseMap<const Value *, unsigned> Counters;
- for (Value *V : Ops) {
- if (isa<UndefValue>(V))
- continue;
- ++Counters[V];
- }
- if (Counters.size() == 2 &&
- any_of(Counters, [&](const std::pair<const Value *, unsigned> &C) {
- return C.second == 1;
- }))
- return true;
- }
- // First operand not a constant or splat? Last attempt - check for
- // potential vectorization.
- InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
- InstructionsState OpS = Analysis.buildInstructionsState(
- Ops, R, /*TryCopyableElementsVectorization=*/true);
- if (!OpS)
- return false;
- unsigned CopyableNum =
- count_if(Ops, [&](Value *V) { return OpS.isCopyableElement(V); });
- return CopyableNum <= VL.size() / 2;
- };
- if (!CheckOperand(Operands.front()))
- return InstructionsState::invalid();
-
- return S;
- }
-
SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S,
ArrayRef<Value *> VL) {
assert(S && "Invalid state!");
SmallVector<BoUpSLP::ValueList> Operands;
- if (S.areInstructionsWithCopyableElements()) {
- MainOp = S.getMainOp();
- MainOpcode = S.getOpcode();
- Operands.assign(MainOp->getNumOperands(),
- BoUpSLP::ValueList(VL.size(), nullptr));
- for (auto [Idx, V] : enumerate(VL)) {
- SmallVector<Value *> OperandsForValue = getOperands(S, V);
- for (auto [OperandIdx, Operand] : enumerate(OperandsForValue))
- Operands[OperandIdx][Idx] = Operand;
- }
- } else {
- buildOriginalOperands(S, VL, Operands);
- }
+ buildOriginalOperands(S, VL, Operands);
return Operands;
}
};
} // namespace
-BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
- ArrayRef<Value *> VL, unsigned Depth, const EdgeInfo &UserTreeIdx,
- bool TryCopyableElementsVectorization) const {
+BoUpSLP::ScalarsVectorizationLegality
+BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
+ const EdgeInfo &UserTreeIdx) const {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
- InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
- InstructionsState S = Analysis.buildInstructionsState(
- VL, *this, TryCopyableElementsVectorization,
- /*WithProfitabilityCheck=*/true);
+ InstructionsState S = getSameOpcode(VL, *TLI);
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
@@ -10583,9 +10242,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
return true;
};
- ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality(
- VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false);
- InstructionsState S = Legality.getInstructionsState();
+ ScalarsVectorizationLegality Legality =
+ getScalarsVectorizationLegality(VL, Depth, UserTreeIdx);
+ const InstructionsState &S = Legality.getInstructionsState();
if (!Legality.isLegal()) {
if (Legality.trySplitVectorize()) {
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
@@ -10593,18 +10252,11 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
return;
}
- if (!S)
- Legality = getScalarsVectorizationLegality(
- VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true);
- if (!Legality.isLegal()) {
- if (Legality.tryToFindDuplicates())
- tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S,
- UserTreeIdx);
+ if (Legality.tryToFindDuplicates())
+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
- newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
- return;
- }
- S = Legality.getInstructionsState();
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
+ return;
}
// FIXME: investigate if there are profitable cases for VL.size() <= 4.
@@ -13372,8 +13024,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
assert(E->getOpcode() &&
((allSameType(VL) && allSameBlock(VL)) ||
(E->getOpcode() == Instruction::GetElementPtr &&
- E->getMainOp()->getType()->isPointerTy()) ||
- E->hasCopyableElements()) &&
+ E->getMainOp()->getType()->isPointerTy())) &&
"Invalid VL");
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
@@ -13385,7 +13036,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallBitVector UsedScalars(Sz, false);
for (unsigned I = 0; I < Sz; ++I) {
if (isa<Instruction>(UniqueValues[I]) &&
- !E->isCopyableElement(UniqueValues[I]) &&
getTreeEntries(UniqueValues[I]).front() == E)
continue;
UsedScalars.set(I);
@@ -16425,8 +16075,6 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
- if (E->isCopyableElement(I))
- continue;
if (FirstInst->getParent() == I->getParent()) {
if (I->comesBefore(FirstInst))
FirstInst = I;
@@ -16491,8 +16139,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
return nullptr;
for (Value *V : E->Scalars) {
auto *I = dyn_cast<Instruction>(V);
- if (!I || isa<PHINode>(I) ||
- (!E->isCopyableElement(I) && doesNotNeedToBeScheduled(I)))
+ if (!I || isa<PHINode>(I) || doesNotNeedToBeScheduled(I))
continue;
ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(I);
if (Bundles.empty())
@@ -16511,8 +16158,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars, [&](Value *V) {
- return isa<PoisonValue>(V) || E->isCopyableElement(V) ||
+ all_of(E->Scalars, [](Value *V) {
+ return isa<PoisonValue>(V) ||
(!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
}))
Res = FindLastInst();
@@ -18993,7 +18640,6 @@ Value *BoUpSLP::vectorizeTree(
TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize &&
(TE->UserTreeIndex.UserTE->getOpcode() != Instruction::PHI ||
TE->UserTreeIndex.UserTE->isAltShuffle()) &&
- !TE->UserTreeIndex.UserTE->hasCopyableElements() &&
all_of(TE->UserTreeIndex.UserTE->Scalars,
[](Value *V) { return isUsedOutsideBlock(V); })) {
Instruction &LastInst =
@@ -19536,7 +19182,7 @@ Value *BoUpSLP::vectorizeTree(
if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
EE && IgnoredExtracts.contains(EE))
continue;
- if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar))
+ if (isa<PoisonValue>(Scalar))
continue;
#ifndef NDEBUG
Type *Ty = Scalar->getType();
@@ -19778,15 +19424,12 @@ void BoUpSLP::optimizeGatherSequence() {
}
BoUpSLP::ScheduleBundle &
-BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL,
- const InstructionsState &S) {
+BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
auto &BundlePtr =
ScheduledBundlesList.emplace_back(std::make_unique<ScheduleBundle>());
for (Value *V : VL) {
if (doesNotNeedToBeScheduled(V))
continue;
- if (S.isCopyableElement(V))
- continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember && "no ScheduleData for bundle member "
"(maybe not in same basic block)");
@@ -19807,19 +19450,10 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
- bool HasCopyables = S.areInstructionsWithCopyableElements();
if (isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) ||
- (!HasCopyables && doesNotNeedToSchedule(VL)) ||
- all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
+ isVectorLikeInstWithConstOps(S.getMainOp()) || doesNotNeedToSchedule(VL))
return nullptr;
- // TODO Remove once full support for copyables is landed.
- assert(all_of(VL,
- [&](Value *V) {
- return !S.isCopyableElement(V) || S.isNonSchedulable(V);
- }) &&
- "Copyable elements should not be schedulable");
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
@@ -19865,7 +19499,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// Make sure that the scheduling region contains all
// instructions of the bundle.
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
+ if (doesNotNeedToBeScheduled(V))
continue;
if (!extendSchedulingRegion(V, S)) {
// If the scheduling region got new instructions at the lower end (or it
@@ -19882,7 +19516,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
bool ReSchedule = false;
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
+ if (doesNotNeedToBeScheduled(V))
continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
@@ -19907,7 +19541,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
ReSchedule = true;
}
- ScheduleBundle &Bundle = buildBundle(VL, S);
+ ScheduleBundle &Bundle = buildBundle(VL);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle.isReady()) {
for (ScheduleData *BD : Bundle.getBundle()) {
@@ -19924,7 +19558,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
}
ScheduledBundlesList.pop_back();
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
+ if (doesNotNeedToBeScheduled(V))
continue;
ScheduledBundles.find(cast<Instruction>(V))->getSecond().pop_back();
}
@@ -20553,7 +20187,7 @@ bool BoUpSLP::collectValuesToDemote(
};
if (E.isGather() || !Visited.insert(&E).second ||
any_of(E.Scalars, [&](Value *V) {
- return !isa<Constant>(V) && all_of(V->users(), [&](User *U) {
+ return !isa<PoisonValue>(V) && all_of(V->users(), [&](User *U) {
return isa<InsertElementInst>(U) && !isVectorized(U);
});
}))
@@ -21019,12 +20653,7 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!IsKnownPositive)
++BitWidth1;
- auto *I = dyn_cast<Instruction>(Root);
- if (!I) {
- MaxBitWidth = std::max(BitWidth1, MaxBitWidth);
- continue;
- }
- APInt Mask = DB->getDemandedBits(I);
+ APInt Mask = DB->getDemandedBits(cast<Instruction>(Root));
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
MaxBitWidth =
std::max<unsigned>(std::min(BitWidth1, BitWidth2), MaxBitWidth);
@@ -21353,9 +20982,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
for (Value *V : Chain)
ValOps.insert(cast<StoreInst>(V)->getValueOperand());
// Operands are not same/alt opcodes or non-power-of-2 uniques - exit.
- InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
- InstructionsState S = Analysis.buildInstructionsState(
- ValOps.getArrayRef(), R, /*TryCopyableElementsVectorization=*/true);
+ InstructionsState S = getSameOpcode(ValOps.getArrayRef(), *TLI);
if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) {
DenseSet<Value *> Stores(Chain.begin(), Chain.end());
bool IsAllowedSize =
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
index 7408ba10cc772..07fdc9d8dd2fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll
@@ -4,6 +4,9 @@
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 1, 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[ADD]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[ICMP:%.*]] = icmp samesign ult i32 0, 0
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[ICMP]], i32 0, i32 0
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64
@@ -14,7 +17,8 @@ define void @test() {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[CALL]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
index 5e3d4715e99c5..15ba98f90f0b8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll
@@ -7,10 +7,17 @@ define i32 @test() {
; CHECK-NEXT: br label %[[FUNC_135_EXIT_I:.*]]
; CHECK: [[FUNC_135_EXIT_I]]:
; CHECK-NEXT: [[G_228_PROMOTED166_I1105_I:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison>, [[TMP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 4>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 23, i32 8, i32 9, i32 10, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <12 x i32> [[TMP3]], <12 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP17]], <16 x i32> [[TMP8]], <16 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 23, i32 24, i32 25, i32 26, i32 2, i32 2, i32 2, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
index 03d76ef571d64..1c482e079bb0f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
@@ -4,10 +4,11 @@
define i64 @test() {
; CHECK-LABEL: define i64 @test() {
; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 0, i32 1
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB5:.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP4]] = or <2 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: br label %[[BB5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll
index 6bb52e0fc43b3..652abef14771d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll
@@ -7,17 +7,19 @@ define void @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[IF_THEN_I_I:.*]]:
-; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK-NEXT: br label %[[BB5:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i1 false to i64
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: br i1 false, label %[[BB3]], label %[[BB2:.*]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i64> [ [[TMP2]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> <i64 0, i64 0, i64 poison, i64 poison>, <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: br i1 false, label %[[BB5]], label %[[BB2:.*]]
+; CHECK: [[BB5]]:
+; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ]
; CHECK-NEXT: br label %[[BB2]]
; CHECK: [[BB2]]:
-; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB3]] ], [ [[TMP2]], %[[BB1]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP6]], %[[BB5]] ], [ [[TMP4]], %[[BB1]] ]
; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr getelementptr inbounds nuw (i8, ptr null, i64 40), align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll
index 782aada17acac..a4949bc67b0f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll
@@ -6,9 +6,14 @@ target triple = "x86_64-unknown-linux-gnu"
define <4 x i32> @foo(<4 x i32> %x, i32 %f) {
; CHECK-LABEL: @foo(
-; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[F:%.*]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECINIT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[VECINIT51:%.*]] = add <4 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[F:%.*]], i32 0
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[F]], 1
+; CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[ADD]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[F]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINIT51:%.*]] = shufflevector <4 x i32> [[VECINIT1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i32> [[VECINIT51]]
;
%vecinit = insertelement <4 x i32> undef, i32 %f, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
index 125c2dce32663..ad4daeab003f5 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -150,9 +150,9 @@ define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead)
define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
; CHECK-LABEL: define <2 x i8> @replace_through_binop_fail_cant_speculate(
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[V:%.*]] = add <2 x i8> [[TMP2]], <i8 0, i8 5>
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[INP]], 5
+; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
+; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> splat (i8 -128), [[V]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0
More information about the llvm-commits
mailing list