[llvm] [SLP][NFC]Extract values state/operands analysis into separate class (PR #138724)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed May 7 11:14:34 PDT 2025
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/138724
>From d63112e972262b6ced76d57f46d952b85a3da957 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 6 May 2025 17:22:39 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 419 ++++++++++++------
1 file changed, 273 insertions(+), 146 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6ae26f2f0e1a..d0602762afc2f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2855,9 +2855,13 @@ class BoUpSLP {
}
/// Go through the instructions in VL and append their operands.
- void appendOperandsOfVL(ArrayRef<Value *> VL, const InstructionsState &S) {
- assert(!VL.empty() && "Bad VL");
- assert((empty() || VL.size() == getNumLanes()) &&
+ void appendOperands(ArrayRef<Value *> VL, ArrayRef<ValueList> Operands,
+ const InstructionsState &S) {
+ assert(!Operands.empty() && !VL.empty() && "Bad list of operands");
+ assert((empty() || all_of(Operands,
+ [this](const ValueList &VL) {
+ return VL.size() == getNumLanes();
+ })) &&
"Expected same number of lanes");
assert(S.valid() && "InstructionsState is invalid.");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
@@ -2866,26 +2870,11 @@ class BoUpSLP {
Instruction *MainOp = S.getMainOp();
unsigned NumOperands = MainOp->getNumOperands();
ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
- OpsVec.resize(NumOperands);
+ OpsVec.resize(ArgSize);
unsigned NumLanes = VL.size();
for (OperandDataVec &Ops : OpsVec)
Ops.resize(NumLanes);
for (unsigned Lane : seq<unsigned>(NumLanes)) {
- Value *V = VL[Lane];
- assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
- "Expected instruction or poison value");
- if (isa<PoisonValue>(V)) {
- for (unsigned OpIdx : seq<unsigned>(NumOperands))
- OpsVec[OpIdx][Lane] = {
- PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
- false};
- if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
- OpsVec[0][Lane] = {EI->getVectorOperand(), true, false};
- } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
- OpsVec[0][Lane] = {EV->getAggregateOperand(), true, false};
- }
- continue;
- }
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -2896,11 +2885,17 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- auto [SelectedOp, Ops] = convertTo(cast<Instruction>(VL[Lane]), S);
+ auto *I = dyn_cast<Instruction>(VL[Lane]);
+ if (!I && isa<PoisonValue>(VL[Lane])) {
+ for (unsigned OpIdx : seq<unsigned>(NumOperands))
+ OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
+ continue;
+ }
+ auto [SelectedOp, Ops] = convertTo(I, S);
bool IsInverseOperation = !isCommutative(SelectedOp);
- for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
- OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
+ OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
}
}
}
@@ -3006,12 +3001,12 @@ class BoUpSLP {
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
- VLOperands(ArrayRef<Value *> RootVL, const InstructionsState &S,
- const BoUpSLP &R)
+ VLOperands(ArrayRef<Value *> RootVL, ArrayRef<ValueList> Operands,
+ const InstructionsState &S, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor(S.getMainOp()->getParent())) {
// Append all the operands of RootVL.
- appendOperandsOfVL(RootVL, S);
+ appendOperands(RootVL, Operands, S);
}
/// \Returns a value vector with the operands across all lanes for the
@@ -3821,12 +3816,6 @@ class BoUpSLP {
/// Interleaving factor for interleaved loads Vectorize nodes.
unsigned InterleaveFactor = 0;
- public:
- /// Returns interleave factor for interleave nodes.
- unsigned getInterleaveFactor() const { return InterleaveFactor; }
- /// Sets interleaving factor for the interleaving nodes.
- void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
-
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
if (Operands.size() < OpIdx + 1)
@@ -3838,13 +3827,16 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set this bundle's operand from Scalars.
- void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
- VLOperands Ops(Scalars, S, R);
- if (RequireReorder)
- Ops.reorder();
- for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands()))
- setOperand(I, Ops.getVL(I));
+ public:
+ /// Returns interleave factor for interleave nodes.
+ unsigned getInterleaveFactor() const { return InterleaveFactor; }
+ /// Sets interleaving factor for the interleaving nodes.
+ void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
+
+ /// Set this bundle's operands from \p Operands.
+ void setOperands(ArrayRef<ValueList> Operands) {
+ for (unsigned I : seq<unsigned>(Operands.size()))
+ setOperand(I, Operands[I]);
}
/// Reorders operands of the node to the given mask \p Mask.
@@ -4840,12 +4832,11 @@ class BoUpSLP {
// where their second (immediate) operand is not added. Since
// immediates do not affect scheduler behavior this is considered
// okay.
- assert(
- In &&
- (isa<ExtractValueInst, ExtractElementInst, IntrinsicInst>(In) ||
- In->getNumOperands() ==
- Bundle->getTreeEntry()->getNumOperands()) &&
- "Missed TreeEntry operands?");
+ assert(In &&
+ (isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
+ In->getNumOperands() ==
+ Bundle->getTreeEntry()->getNumOperands()) &&
+ "Missed TreeEntry operands?");
for (unsigned OpIdx :
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
@@ -9734,6 +9725,190 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
return true;
}
+namespace {
+/// Class accepts incoming list of values and generates the list of values
+/// for scheduling and list of operands for the new nodes.
+class InstructionsCompatibilityAnalysis {
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+ const TargetLibraryInfo &TLI;
+
+ /// Builds operands for the original instructions.
+ void
+ buildOriginalOperands(const InstructionsState &S, ArrayRef<Value *> VL,
+ SmallVectorImpl<BoUpSLP::ValueList> &Operands) const {
+
+ unsigned ShuffleOrOp =
+ S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
+ Instruction *VL0 = S.getMainOp();
+
+ switch (ShuffleOrOp) {
+ case Instruction::PHI: {
+ auto *PH = cast<PHINode>(VL0);
+
+ // Keeps the reordered operands to avoid code duplication.
+ PHIHandler Handler(DT, PH, VL);
+ Handler.buildOperands();
+ Operands.assign(PH->getNumOperands(), {});
+ for (unsigned I : seq<unsigned>(PH->getNumOperands()))
+ Operands[I].assign(Handler.getOperands(I).begin(),
+ Handler.getOperands(I).end());
+ return;
+ }
+ case Instruction::ExtractValue:
+ case Instruction::ExtractElement:
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ Operands.assign(1, {VL.size(), VL0->getOperand(0)});
+ return;
+ case Instruction::InsertElement:
+ Operands.assign(2, {VL.size(), nullptr});
+ for (auto [Idx, V] : enumerate(VL)) {
+ auto *IE = cast<InsertElementInst>(V);
+ for (auto [OpIdx, Ops] : enumerate(Operands))
+ Ops[Idx] = IE->getOperand(OpIdx);
+ }
+ return;
+ case Instruction::Load:
+ Operands.assign(
+ 1, {VL.size(),
+ PoisonValue::get(cast<LoadInst>(VL0)->getPointerOperandType())});
+ for (auto [V, Op] : zip(VL, Operands.back())) {
+ auto *LI = dyn_cast<LoadInst>(V);
+ if (!LI)
+ continue;
+ Op = LI->getPointerOperand();
+ }
+ return;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Select:
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Freeze:
+ case Instruction::Store:
+ case Instruction::ShuffleVector:
+ Operands.assign(VL0->getNumOperands(), {VL.size(), nullptr});
+ for (auto [Idx, V] : enumerate(VL)) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ for (auto [OpIdx, Ops] : enumerate(Operands))
+ Ops[Idx] = PoisonValue::get(VL0->getOperand(OpIdx)->getType());
+ continue;
+ }
+ auto [Op, ConvertedOps] = convertTo(I, S);
+ for (auto [OpIdx, Ops] : enumerate(Operands))
+ Ops[Idx] = ConvertedOps[OpIdx];
+ }
+ return;
+ case Instruction::GetElementPtr: {
+ Operands.assign(2, {VL.size(), nullptr});
+ // Need to cast all indices to the same type before vectorization to
+ // avoid crash.
+ // Required to be able to find correct matches between different gather
+ // nodes and reuse the vectorized values rather than trying to gather them
+ // again.
+ const unsigned IndexIdx = 1;
+ Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
+ Type *Ty = all_of(VL,
+ [VL0Ty](Value *V) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ return true;
+ return VL0Ty == GEP->getOperand(IndexIdx)->getType();
+ })
+ ? VL0Ty
+ : DL.getIndexType(cast<GetElementPtrInst>(VL0)
+ ->getPointerOperandType()
+ ->getScalarType());
+ for (auto [Idx, V] : enumerate(VL)) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP) {
+ Operands[0][Idx] = V;
+ Operands[1][Idx] = ConstantInt::getNullValue(Ty);
+ continue;
+ }
+ Operands[0][Idx] = GEP->getPointerOperand();
+ auto *Op = GEP->getOperand(IndexIdx);
+ auto *CI = dyn_cast<ConstantInt>(Op);
+ Operands[1][Idx] = CI ? ConstantFoldIntegerCast(
+ CI, Ty, CI->getValue().isSignBitSet(), DL)
+ : Op;
+ }
+ return;
+ }
+ case Instruction::Call: {
+ auto *CI = cast<CallInst>(VL0);
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, &TLI);
+ for (unsigned Idx : seq<unsigned>(CI->arg_size())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx, &TTI))
+ continue;
+ auto &Ops = Operands.emplace_back();
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ Ops.push_back(I ? I->getOperand(Idx)
+ : PoisonValue::get(VL0->getOperand(Idx)->getType()));
+ }
+ }
+ return;
+ }
+ default:
+ break;
+ }
+ llvm_unreachable("Unexpected vectorization of the instructions.");
+ }
+
+public:
+ InstructionsCompatibilityAnalysis(DominatorTree &DT, const DataLayout &DL,
+ const TargetTransformInfo &TTI,
+ const TargetLibraryInfo &TLI)
+ : DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
+
+ InstructionsState buildInstructionsState(ArrayRef<Value *> VL) {
+ InstructionsState S = getSameOpcode(VL, TLI);
+ return S;
+ }
+
+ SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S,
+ ArrayRef<Value *> VL) {
+ assert(S && "Invalid state!");
+ SmallVector<BoUpSLP::ValueList> Operands;
+ buildOriginalOperands(S, VL, Operands);
+ return Operands;
+ }
+};
+} // namespace
+
bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx,
InstructionsState &S,
@@ -9741,9 +9916,10 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
bool &TrySplitVectorize) const {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
- S = getSameOpcode(VL, *TLI);
TryToFindDuplicates = true;
TrySplitVectorize = false;
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ S = Analysis.buildInstructionsState(VL);
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
@@ -10118,6 +10294,8 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
registerNonVectorizableLoads(ArrayRef(VL));
return;
}
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ SmallVector<ValueList> Operands = Analysis.buildOperands(S, VL);
ScheduleBundle Empty;
ScheduleBundle &Bundle = BundlePtr.value() ? *BundlePtr.value() : Empty;
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@@ -10142,21 +10320,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
};
switch (ShuffleOrOp) {
case Instruction::PHI: {
- auto *PH = cast<PHINode>(VL0);
-
TreeEntry *TE =
newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (PHINode).\n";
TE->dump());
- // Keeps the reordered operands to avoid code duplication.
- PHIHandler Handler(*DT, PH, VL);
- Handler.buildOperands();
- for (unsigned I : seq<unsigned>(PH->getNumOperands()))
- TE->setOperand(I, Handler.getOperands(I));
- SmallVector<ArrayRef<Value *>> Operands(PH->getNumOperands());
- for (unsigned I : seq<unsigned>(PH->getNumOperands()))
- Operands[I] = Handler.getOperands(I);
+ TE->setOperands(Operands);
CreateOperandNodes(TE, Operands);
return;
}
@@ -10183,7 +10352,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
TE->dump());
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTreeRec() towards the operands.
- TE->setOperand(*this);
+ TE->setOperands(Operands);
return;
}
case Instruction::InsertElement: {
@@ -10214,7 +10383,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (InsertElementInst).\n";
TE->dump());
- TE->setOperand(*this);
+ TE->setOperands(Operands);
buildTreeRec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -10269,7 +10438,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- TE->setOperand(*this);
+ TE->setOperands(Operands);
if (State == TreeEntry::ScatterVectorize)
buildTreeRec(PointerOps, Depth + 1, {TE, 0});
return;
@@ -10310,7 +10479,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (CastInst).\n";
TE->dump());
- TE->setOperand(*this);
+ TE->setOperands(Operands);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTreeRec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
@@ -10338,37 +10507,28 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (CmpInst).\n";
TE->dump());
- ValueList Left, Right;
- VLOperands Ops(VL, S, *this);
+ VLOperands Ops(VL, Operands, S, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
+ Operands.front() = Ops.getVL(0);
+ Operands.back() = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Left.push_back(PoisonValue::get(VL0->getOperand(0)->getType()));
- Right.push_back(PoisonValue::get(VL0->getOperand(1)->getType()));
+ for (auto [Idx, V] : enumerate(VL)) {
+ if (isa<PoisonValue>(V))
continue;
- }
auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
if (Cmp->getPredicate() != P0)
- std::swap(LHS, RHS);
- Left.push_back(LHS);
- Right.push_back(RHS);
+ std::swap(Operands.front()[Idx], Operands.back()[Idx]);
}
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTreeRec(Left, Depth + 1, {TE, 0});
- buildTreeRec(Right, Depth + 1, {TE, 1});
+ TE->setOperands(Operands);
+ buildTreeRec(Operands.front(), Depth + 1, {TE, 0});
+ buildTreeRec(Operands.back(), Depth + 1, {TE, 1});
if (ShuffleOrOp == Instruction::ICmp) {
unsigned NumSignBits0 =
ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
@@ -10411,7 +10571,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
"(SelectInst/UnaryOperator/BinaryOperator/FreezeInst).\n";
TE->dump());
- TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
+ VLOperands Ops(VL, Operands, S, *this);
+ Ops.reorder();
+ Operands[0] = Ops.getVL(0);
+ Operands[1] = Ops.getVL(1);
+ }
+ TE->setOperands(Operands);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTreeRec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -10421,52 +10587,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (GetElementPtrInst).\n";
TE->dump());
- SmallVector<ValueList, 2> Operands(2);
- // Prepare the operand vector for pointer operands.
- for (Value *V : VL) {
- auto *GEP = dyn_cast<GetElementPtrInst>(V);
- if (!GEP) {
- Operands.front().push_back(V);
- continue;
- }
- Operands.front().push_back(GEP->getPointerOperand());
- }
- TE->setOperand(0, Operands.front());
- // Need to cast all indices to the same type before vectorization to
- // avoid crash.
- // Required to be able to find correct matches between different gather
- // nodes and reuse the vectorized values rather than trying to gather them
- // again.
- int IndexIdx = 1;
- Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
- Type *Ty = all_of(VL,
- [VL0Ty, IndexIdx](Value *V) {
- auto *GEP = dyn_cast<GetElementPtrInst>(V);
- if (!GEP)
- return true;
- return VL0Ty == GEP->getOperand(IndexIdx)->getType();
- })
- ? VL0Ty
- : DL->getIndexType(cast<GetElementPtrInst>(VL0)
- ->getPointerOperandType()
- ->getScalarType());
- // Prepare the operand vector.
- for (Value *V : VL) {
- auto *I = dyn_cast<GetElementPtrInst>(V);
- if (!I) {
- Operands.back().push_back(
- ConstantInt::get(Ty, 0, /*isSigned=*/false));
- continue;
- }
- auto *Op = I->getOperand(IndexIdx);
- auto *CI = dyn_cast<ConstantInt>(Op);
- if (!CI)
- Operands.back().push_back(Op);
- else
- Operands.back().push_back(ConstantFoldIntegerCast(
- CI, Ty, CI->getValue().isSignBitSet(), *DL));
- }
- TE->setOperand(IndexIdx, Operands.back());
+ TE->setOperands(Operands);
for (unsigned I = 0, Ops = Operands.size(); I < Ops; ++I)
buildTreeRec(Operands[I], Depth + 1, {TE, I});
@@ -10485,7 +10606,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
LLVM_DEBUG(
dbgs() << "SLP: added a new TreeEntry (jumbled StoreInst).\n";
TE->dump());
- TE->setOperand(*this);
+ TE->setOperands(Operands);
buildTreeRec(TE->getOperand(0), Depth + 1, {TE, 0});
return;
}
@@ -10499,7 +10620,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (CallInst).\n";
TE->dump());
- TE->setOperand(*this, isCommutative(VL0));
+ if (isCommutative(VL0)) {
+ VLOperands Ops(VL, Operands, S, *this);
+ Ops.reorder();
+ Operands[0] = Ops.getVL(0);
+ Operands[1] = Ops.getVL(1);
+ }
+ TE->setOperands(Operands);
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
@@ -10533,37 +10660,34 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
- ValueList Left, Right;
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
- Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
+ for (auto [Idx, V] : enumerate(VL)) {
+ if (isa<PoisonValue>(V))
continue;
- }
auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
+ std::swap(Operands.front()[Idx], Operands.back()[Idx]);
} else {
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
+ std::swap(Operands.front()[Idx], Operands.back()[Idx]);
}
- Left.push_back(LHS);
- Right.push_back(RHS);
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTreeRec(Left, Depth + 1, {TE, 0});
- buildTreeRec(Right, Depth + 1, {TE, 1});
+ TE->setOperands(Operands);
+ buildTreeRec(Operands.front(), Depth + 1, {TE, 0});
+ buildTreeRec(Operands.back(), Depth + 1, {TE, 1});
return;
}
- TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
+ if (isa<BinaryOperator>(VL0) || CI) {
+ VLOperands Ops(VL, Operands, S, *this);
+ Ops.reorder();
+ Operands[0] = Ops.getVL(0);
+ Operands[1] = Ops.getVL(1);
+ }
+ TE->setOperands(Operands);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTreeRec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -12755,7 +12879,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
unsigned Idx) const {
ArrayRef<Value *> VL = E->getOperand(Idx);
- InstructionsState S = getSameOpcode(VL, *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(VL);
// Special processing for GEPs bundle, which may include non-gep values.
if (!S && VL.front()->getType()->isPointerTy()) {
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
@@ -16768,7 +16893,8 @@ BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
ValueList &VL = E->getOperand(NodeIdx);
- InstructionsState S = getSameOpcode(VL, *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(VL);
// Special processing for GEPs bundle, which may include non-gep values.
if (!S && VL.front()->getType()->isPointerTy()) {
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
@@ -20865,7 +20991,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
for (Value *V : Chain)
ValOps.insert(cast<StoreInst>(V)->getValueOperand());
// Operands are not same/alt opcodes or non-power-of-2 uniques - exit.
- InstructionsState S = getSameOpcode(ValOps.getArrayRef(), *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(ValOps.getArrayRef());
if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) {
DenseSet<Value *> Stores(Chain.begin(), Chain.end());
bool IsAllowedSize =
>From b0279ad088d5e3fe215643eaee9b6713fb6dad07 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Wed, 7 May 2025 18:14:25 +0000
Subject: [PATCH 2/2] address comments
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++++----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d0602762afc2f..ef12adf4b54c2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9840,17 +9840,16 @@ class InstructionsCompatibilityAnalysis {
// again.
const unsigned IndexIdx = 1;
Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
- Type *Ty = all_of(VL,
- [VL0Ty](Value *V) {
- auto *GEP = dyn_cast<GetElementPtrInst>(V);
- if (!GEP)
- return true;
- return VL0Ty == GEP->getOperand(IndexIdx)->getType();
- })
- ? VL0Ty
- : DL.getIndexType(cast<GetElementPtrInst>(VL0)
- ->getPointerOperandType()
- ->getScalarType());
+ Type *Ty =
+ all_of(VL,
+ [VL0Ty](Value *V) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ return !GEP || VL0Ty == GEP->getOperand(IndexIdx)->getType();
+ })
+ ? VL0Ty
+ : DL.getIndexType(cast<GetElementPtrInst>(VL0)
+ ->getPointerOperandType()
+ ->getScalarType());
for (auto [Idx, V] : enumerate(VL)) {
auto *GEP = dyn_cast<GetElementPtrInst>(V);
if (!GEP) {
More information about the llvm-commits
mailing list