[llvm] 6307e4b - Revert "[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#113880)"
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 05:27:15 PST 2024
Author: Nikita Popov
Date: 2024-12-06T14:27:03+01:00
New Revision: 6307e4b31efee4b5a396da1df2e0939ab9009f11
URL: https://github.com/llvm/llvm-project/commit/6307e4b31efee4b5a396da1df2e0939ab9009f11
DIFF: https://github.com/llvm/llvm-project/commit/6307e4b31efee4b5a396da1df2e0939ab9009f11.diff
LOG: Revert "[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#113880)"
This reverts commit 94fbe7e3ae7c0ce4e9a7d801e7700457a36f731d.
Causes a crash when linking mafft in ReleaseLTO-g config.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb7ab924200699..51841a842ce0b0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2017,9 +2017,6 @@ class BoUpSLP {
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
- /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
- /// is not IntrinsicInst, ArgSize is User::getNumOperands.
- unsigned ArgSize = 0;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2407,12 +2404,10 @@ class BoUpSLP {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
- // IntrinsicInst::isCommutative returns true if swapping the first "two"
- // arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
- unsigned NumOperands = VL0->getNumOperands();
- ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
+ unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
+ : VL0->getNumOperands();
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,7 +2440,7 @@ class BoUpSLP {
}
/// \returns the number of operands.
- unsigned getNumOperands() const { return ArgSize; }
+ unsigned getNumOperands() const { return OpsVec.size(); }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2622,8 +2617,7 @@ class BoUpSLP {
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
- for (ArrayRef<OperandData> Op :
- ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
+ for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
@@ -3144,6 +3138,13 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);
+ /// Reorder commutative or alt operands to get better probability of
+ /// generating vectorized code.
+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const BoUpSLP &R);
+
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
@@ -3338,15 +3339,27 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set this bundle's operand from \p VL.
- void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
- bool RequireReorder = false) {
- VLOperands Ops(VL, R);
- if (RequireReorder)
- Ops.reorder();
- for (unsigned I :
- seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
- setOperand(I, Ops.getVL(I));
+ /// Set the operands of this bundle in their original order.
+ void setOperandsInOrder() {
+ assert(Operands.empty() && "Already initialized?");
+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
+ Operands.resize(I0->getNumOperands());
+ unsigned NumLanes = Scalars.size();
+ for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
+ OpIdx != NumOperands; ++OpIdx) {
+ Operands[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ if (isa<PoisonValue>(Scalars[Lane])) {
+ Operands[OpIdx][Lane] =
+ PoisonValue::get(I0->getOperand(OpIdx)->getType());
+ continue;
+ }
+ auto *I = cast<Instruction>(Scalars[Lane]);
+ assert(I->getNumOperands() == NumOperands &&
+ "Expected same number of operands");
+ Operands[OpIdx][Lane] = I->getOperand(OpIdx);
+ }
+ }
}
/// Reorders operands of the node to the given mask \p Mask.
@@ -8446,7 +8459,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperand(VL, *this);
+ TE->setOperandsInOrder();
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8467,26 +8480,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
+ TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
+ TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
+ TE->setOperandsInOrder();
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- TE->setOperand(VL, *this);
- if (State == TreeEntry::ScatterVectorize)
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
@@ -8524,8 +8538,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperand(VL, *this);
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperandsInOrder();
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8552,15 +8566,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
- VLOperands Ops(VL, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
- Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -8621,8 +8632,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ // Sort operands of the instructions so that each side is more likely to
+ // have the same opcode.
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
+ ValueList Left, Right;
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ return;
+ }
+
+ TE->setOperandsInOrder();
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -8687,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperand(VL, *this);
+ TE->setOperandsInOrder();
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,13 +8726,46 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- TE->setOperand(VL, *this, isCommutative(VL0));
- for (unsigned I : seq<unsigned>(CI->arg_size())) {
+ // Sort operands of the instructions so that each side is more likely to
+ // have the same opcode.
+ if (isCommutative(VL0)) {
+ ValueList Left, Right;
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ SmallVector<ValueList> Operands;
+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
+ Operands.emplace_back();
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
+ continue;
+ for (Value *V : VL) {
+ auto *CI2 = cast<CallInst>(V);
+ Operands.back().push_back(CI2->getArgOperand(I));
+ }
+ TE->setOperand(I, Operands.back());
+ }
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
+ if (Operands[I - 2].empty())
+ continue;
+ buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
+ }
+ return;
+ }
+ TE->setOperandsInOrder();
+ for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
- buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (Value *V : VL) {
+ auto *CI2 = cast<CallInst>(V);
+ Operands.push_back(CI2->getArgOperand(I));
+ }
+ buildTree_rec(Operands, Depth + 1, {TE, I});
}
return;
}
@@ -8720,37 +8776,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
- if (CI && any_of(VL, [](Value *V) {
- return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
- })) {
- auto *MainCI = cast<CmpInst>(S.MainOp);
- auto *AltCI = cast<CmpInst>(S.AltOp);
- CmpInst::Predicate MainP = MainCI->getPredicate();
- CmpInst::Predicate AltP = AltCI->getPredicate();
- assert(MainP != AltP &&
- "Expected
diff erent main/alternate predicates.");
+ if (isa<BinaryOperator>(VL0) || CI) {
ValueList Left, Right;
- // Collect operands - commute if it uses the swapped predicate or
- // alternate operation.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
- Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
- continue;
- }
- auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
+ if (!CI || all_of(VL, [](Value *V) {
+ return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
+ })) {
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ } else {
+ auto *MainCI = cast<CmpInst>(S.MainOp);
+ auto *AltCI = cast<CmpInst>(S.AltOp);
+ CmpInst::Predicate MainP = MainCI->getPredicate();
+ CmpInst::Predicate AltP = AltCI->getPredicate();
+ assert(MainP != AltP &&
+ "Expected
diff erent main/alternate predicates.");
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ Left.push_back(
+ PoisonValue::get(MainCI->getOperand(0)->getType()));
+ Right.push_back(
+ PoisonValue::get(MainCI->getOperand(1)->getType()));
+ continue;
+ }
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- } else {
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
+ } else {
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
+ }
+ Left.push_back(LHS);
+ Right.push_back(RHS);
}
- Left.push_back(LHS);
- Right.push_back(RHS);
}
TE->setOperand(0, Left);
TE->setOperand(1, Right);
@@ -8759,8 +8821,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperandsInOrder();
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -13465,6 +13527,21 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const BoUpSLP &R) {
+ if (VL.empty())
+ return;
+ VLOperands Ops(VL, R);
+ // Reorder the operands in place.
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
+}
+
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
More information about the llvm-commits
mailing list