[llvm] [SLP] NFC. Make InstructionsState more constant. (PR #118609)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 17:54:31 PST 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/118609
>From 75e28717d202d434669a1df7d0e7244679453e9a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 4 Dec 2024 00:10:46 -0800
Subject: [PATCH 1/7] [SLP] NFC. Make InstructionsState more constant.
Add getOpValue, getMainOp and getAltOp.
Use `InstructionsState &` instead of `const InstructionsState &`.
Use `!S.isAltShuffle()` instead of `S.MainOp == S.AltOp`.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 132 ++++++++++--------
1 file changed, 73 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 33657c26356d65..ea79f24e2b86df 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -807,7 +807,7 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
namespace {
/// Main data required for vectorization of instructions.
-struct InstructionsState {
+class InstructionsState {
/// The very first instruction in the list with the main opcode.
Value *OpValue = nullptr;
@@ -815,6 +815,13 @@ struct InstructionsState {
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
+public:
+ Value *getOpValue() const { return OpValue; }
+
+ Instruction *getMainOp() const { return MainOp; }
+
+ Instruction *getAltOp() const { return AltOp; }
+
/// The main/alternate opcodes for the list of instructions.
unsigned getOpcode() const {
return MainOp ? MainOp->getOpcode() : 0;
@@ -1842,12 +1849,12 @@ class BoUpSLP {
// Note: Only consider instructions with <= 2 operands to avoid
// complexity explosion.
if (S.getOpcode() &&
- (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() ||
+ (S.getMainOp()->getNumOperands() <= 2 || !MainAltOps.empty() ||
!S.isAltShuffle()) &&
all_of(Ops, [&S](Value *V) {
return isa<PoisonValue>(V) ||
cast<Instruction>(V)->getNumOperands() ==
- S.MainOp->getNumOperands();
+ S.getMainOp()->getNumOperands();
}))
return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes
: LookAheadHeuristics::ScoreSameOpcode;
@@ -3410,8 +3417,8 @@ class BoUpSLP {
}
void setOperations(const InstructionsState &S) {
- MainOp = S.MainOp;
- AltOp = S.AltOp;
+ MainOp = S.getMainOp();
+ AltOp = S.getAltOp();
}
Instruction *getMainOp() const {
@@ -3649,8 +3656,8 @@ class BoUpSLP {
}
// Update the scheduler bundle to point to this TreeEntry.
ScheduleData *BundleMember = *Bundle;
- assert((BundleMember || isa<PHINode>(S.MainOp) ||
- isVectorLikeInstWithConstOps(S.MainOp) ||
+ assert((BundleMember || isa<PHINode>(S.getMainOp()) ||
+ isVectorLikeInstWithConstOps(S.getMainOp()) ||
doesNotNeedToSchedule(VL)) &&
"Bundle and VL out of sync");
if (BundleMember) {
@@ -3717,9 +3724,11 @@ class BoUpSLP {
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
- TreeEntry::EntryState getScalarsVectorizationState(
- InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
- OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps);
+ TreeEntry::EntryState
+ getScalarsVectorizationState(const InstructionsState &S, ArrayRef<Value *> VL,
+ bool IsScatterVectorizeUserTE,
+ OrdersType &CurrentOrder,
+ SmallVectorImpl<Value *> &PointerOps);
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
@@ -7426,17 +7435,17 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
unsigned Opcode1 = S.getAltOpcode();
SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
// If this pattern is supported by the target then consider it profitable.
- if (TTI->isLegalAltInstr(getWidenedType(S.MainOp->getType(), VL.size()),
+ if (TTI->isLegalAltInstr(getWidenedType(S.getMainOp()->getType(), VL.size()),
Opcode0, Opcode1, OpcodeMask))
return true;
SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) {
+ for (unsigned I : seq<unsigned>(0, S.getMainOp()->getNumOperands())) {
Operands.emplace_back();
// Prepare the operand vector.
for (Value *V : VL) {
if (isa<PoisonValue>(V)) {
Operands.back().push_back(
- PoisonValue::get(S.MainOp->getOperand(I)->getType()));
+ PoisonValue::get(S.getMainOp()->getOperand(I)->getType()));
continue;
}
Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
@@ -7486,7 +7495,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
++ExtraShuffleInsts;
}
}
- const Loop *L = LI->getLoopFor(S.MainOp->getParent());
+ const Loop *L = LI->getLoopFor(S.getMainOp()->getParent());
// Vectorize node, if:
// 1. at least single operand is constant or splat.
// 2. Operands have many loop invariants (the instructions are not loop
@@ -7496,7 +7505,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
[&](ArrayRef<Value *> Op) {
if (allConstant(Op) ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
- getSameOpcode(Op, *TLI).MainOp))
+ getSameOpcode(Op, *TLI).getMainOp()))
return false;
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
@@ -7528,19 +7537,21 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
// vector operands is number of vector instructions + number of vector
// instructions for operands (buildvectors). Number of buildvector
// instructions is just number_of_operands * number_of_scalars.
- (UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() &&
+ (UndefCnt < (VL.size() - 1) * S.getMainOp()->getNumOperands() &&
(UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
- NumAltInsts) < S.MainOp->getNumOperands() * VL.size());
+ NumAltInsts) < S.getMainOp()->getNumOperands() * VL.size());
}
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
- InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
- OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) {
- assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
+ const InstructionsState &S, ArrayRef<Value *> VL,
+ bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
+ SmallVectorImpl<Value *> &PointerOps) {
+ assert(S.getMainOp() &&
+ "Expected instructions with same/alternate opcodes only.");
unsigned ShuffleOrOp =
S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
- auto *VL0 = cast<Instruction>(S.OpValue);
+ auto *VL0 = cast<Instruction>(S.getOpValue());
switch (ShuffleOrOp) {
case Instruction::PHI: {
// Too many operands - gather, most probably won't be vectorized.
@@ -7712,7 +7723,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
case Instruction::Or:
case Instruction::Xor:
case Instruction::Freeze:
- if (S.MainOp->getType()->isFloatingPointTy() &&
+ if (S.getMainOp()->getType()->isFloatingPointTy() &&
TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
auto *I = dyn_cast<Instruction>(V);
return I && I->isBinaryOp() && !I->isFast();
@@ -7809,7 +7820,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
return TreeEntry::NeedToGather;
}
case Instruction::Call: {
- if (S.MainOp->getType()->isFloatingPointTy() &&
+ if (S.getMainOp()->getType()->isFloatingPointTy() &&
TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
auto *I = dyn_cast<Instruction>(V);
return I && !I->isFast();
@@ -8035,7 +8046,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return isa<UndefValue>(V) || !isConstant(V);
}))) {
if (DoNotFail && UniquePositions.size() > 1 &&
- NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
+ NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
// Find the number of elements, which forms full vectors.
unsigned PWSz = getFullVectorNumberOfElements(
@@ -8065,8 +8076,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
// place to insert a shuffle if we need to, so just avoid that issue.
- if (S.MainOp &&
- isa<CatchSwitchInst>(S.MainOp->getParent()->getTerminator())) {
+ if (S.getMainOp() &&
+ isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
@@ -8074,10 +8085,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check if this is a duplicate of another entry.
if (S.getOpcode()) {
- if (TreeEntry *E = getTreeEntry(S.OpValue)) {
- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
+ if (TreeEntry *E = getTreeEntry(S.getOpValue())) {
+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getOpValue()
+ << ".\n");
if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) {
- auto It = MultiNodeScalars.find(S.OpValue);
+ auto It = MultiNodeScalars.find(S.getOpValue());
if (It != MultiNodeScalars.end()) {
auto *TEIt = find_if(It->getSecond(),
[&](TreeEntry *ME) { return ME->isSame(VL); });
@@ -8090,7 +8102,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
if (!E) {
- if (!doesNotNeedToBeScheduled(S.OpValue)) {
+ if (!doesNotNeedToBeScheduled(S.getOpValue())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
@@ -8098,8 +8110,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
SmallPtrSet<const TreeEntry *, 4> Nodes;
- Nodes.insert(getTreeEntry(S.OpValue));
- for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue))
+ Nodes.insert(getTreeEntry(S.getOpValue()));
+ for (const TreeEntry *E : MultiNodeScalars.lookup(S.getOpValue()))
Nodes.insert(E);
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
if (any_of(Nodes, [&](const TreeEntry *E) {
@@ -8122,7 +8134,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// used to properly draw the graph rather than for the actual
// vectorization.
E->UserTreeIndices.push_back(UserTreeIdx);
- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getOpValue()
<< ".\n");
return;
}
@@ -8133,13 +8145,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// a load), in which case peek through to include it in the tree, without
// ballooning over-budget.
if (Depth >= RecursionMaxDepth &&
- !(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp &&
+ !(S.getMainOp() && isa<Instruction>(S.getMainOp()) && !S.isAltShuffle() &&
VL.size() >= 4 &&
- (match(S.MainOp, m_Load(m_Value())) || all_of(VL, [&S](const Value *I) {
+ (match(S.getMainOp(), m_Load(m_Value())) ||
+ all_of(VL, [&S](const Value *I) {
return match(I,
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
cast<Instruction>(I)->getOpcode() ==
- cast<Instruction>(S.MainOp)->getOpcode();
+ cast<Instruction>(S.getMainOp())->getOpcode();
})))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
@@ -8151,7 +8164,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't handle scalable vectors
if (S.getOpcode() == Instruction::ExtractElement &&
isa<ScalableVectorType>(
- cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
+ cast<ExtractElementInst>(S.getOpValue())->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
@@ -8188,7 +8201,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return isa<Instruction>(Op) || isVectorLikeInstWithConstOps(Op);
}));
}
- bool IsCommutative = isCommutative(S.MainOp) || isCommutative(S.AltOp);
+ bool IsCommutative =
+ isCommutative(S.getMainOp()) || isCommutative(S.getAltOp());
if ((IsCommutative &&
std::accumulate(InstsCount.begin(), InstsCount.end(), 0) < 2) ||
(!IsCommutative &&
@@ -8198,20 +8212,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
SmallVector<SmallVector<std::pair<Value *, Value *>>> Candidates;
auto *I1 = cast<Instruction>(VL.front());
auto *I2 = cast<Instruction>(VL.back());
- for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
+ for (int Op = 0, E = S.getMainOp()->getNumOperands(); Op < E; ++Op)
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand(Op));
if (static_cast<unsigned>(count_if(
Candidates, [this](ArrayRef<std::pair<Value *, Value *>> Cand) {
return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat);
- })) >= S.MainOp->getNumOperands() / 2)
+ })) >= S.getMainOp()->getNumOperands() / 2)
return false;
- if (S.MainOp->getNumOperands() > 2)
+ if (S.getMainOp()->getNumOperands() > 2)
return true;
if (IsCommutative) {
// Check permuted operands.
Candidates.clear();
- for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
+ for (int Op = 0, E = S.getMainOp()->getNumOperands(); Op < E; ++Op)
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand((Op + 1) % E));
if (any_of(
@@ -8246,7 +8260,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
if (!AreAllSameInsts || (!S.getOpcode() && allConstant(VL)) || isSplat(VL) ||
(isa_and_present<InsertElementInst, ExtractValueInst, ExtractElementInst>(
- S.OpValue) &&
+ S.getOpValue()) &&
!all_of(VL, isVectorLikeInstWithConstOps)) ||
NotProfitableForVectorization(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
@@ -8313,10 +8327,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
- auto *VL0 = cast<Instruction>(S.OpValue);
+ auto *VL0 = cast<Instruction>(S.getOpValue());
BB = VL0->getParent();
- if (S.MainOp &&
+ if (S.getMainOp() &&
(BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
!DT->isReachableFromEntry(BB))) {
// Don't go into unreachable blocks. They may contain instructions with
@@ -8783,8 +8797,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
})) {
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
} else {
- auto *MainCI = cast<CmpInst>(S.MainOp);
- auto *AltCI = cast<CmpInst>(S.AltOp);
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
CmpInst::Predicate MainP = MainCI->getPredicate();
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
@@ -9707,7 +9721,7 @@ void BoUpSLP::transformNodes() {
auto CheckOperandsProfitability = [this](Instruction *I1, Instruction *I2,
const InstructionsState &S) {
SmallVector<SmallVector<std::pair<Value *, Value *>>> Candidates;
- for (unsigned Op : seq<unsigned>(S.MainOp->getNumOperands()))
+ for (unsigned Op : seq<unsigned>(S.getMainOp()->getNumOperands()))
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand(Op));
return all_of(
@@ -9791,7 +9805,7 @@ void BoUpSLP::transformNodes() {
// Try to vectorize reduced values or if all users are vectorized.
// For expensive instructions extra extracts might be profitable.
if ((!UserIgnoreList || E.Idx != 0) &&
- TTI->getInstructionCost(S.MainOp, CostKind) <
+ TTI->getInstructionCost(S.getMainOp(), CostKind) <
TTI::TCC_Expensive &&
!all_of(Slice, [&](Value *V) {
if (isa<PoisonValue>(V))
@@ -9818,10 +9832,10 @@ void BoUpSLP::transformNodes() {
continue;
}
} else if (S.getOpcode() == Instruction::ExtractElement ||
- (TTI->getInstructionCost(S.MainOp, CostKind) <
+ (TTI->getInstructionCost(S.getMainOp(), CostKind) <
TTI::TCC_Expensive &&
!CheckOperandsProfitability(
- S.MainOp,
+ S.getMainOp(),
cast<Instruction>(*find_if(reverse(Slice),
IsaPred<Instruction>)),
S))) {
@@ -14481,10 +14495,10 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E,
VE->isSame(TE->Scalars);
}));
};
- TreeEntry *VE = getTreeEntry(S.OpValue);
+ TreeEntry *VE = getTreeEntry(S.getOpValue());
if (VE && CheckSameVE(VE))
return VE;
- auto It = MultiNodeScalars.find(S.OpValue);
+ auto It = MultiNodeScalars.find(S.getOpValue());
if (It != MultiNodeScalars.end()) {
auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
return TE != VE && CheckSameVE(TE);
@@ -16966,13 +16980,13 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
- if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue) ||
- doesNotNeedToSchedule(VL))
+ if (isa<PHINode>(S.getOpValue()) ||
+ isVectorLikeInstWithConstOps(S.getOpValue()) || doesNotNeedToSchedule(VL))
return nullptr;
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
- LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getOpValue() << "\n");
auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
ScheduleData *Bundle) {
@@ -17053,7 +17067,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
auto *Bundle = buildBundle(VL);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle->isReady()) {
- cancelScheduling(VL, S.OpValue);
+ cancelScheduling(VL, S.getOpValue());
return std::nullopt;
}
return Bundle;
@@ -18432,7 +18446,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
(VectorizeNonPowerOf2 && has_single_bit(ValOps.size() + 1));
if ((!IsAllowedSize && S.getOpcode() &&
S.getOpcode() != Instruction::Load &&
- (!S.MainOp->isSafeToRemove() ||
+ (!S.getMainOp()->isSafeToRemove() ||
any_of(ValOps.getArrayRef(),
[&](Value *V) {
return !isa<ExtractElementInst>(V) &&
@@ -18969,7 +18983,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!S.getOpcode())
return false;
- Instruction *I0 = cast<Instruction>(S.OpValue);
+ Instruction *I0 = cast<Instruction>(S.getOpValue());
// Make sure invalid types (including vector type) are rejected before
// determining vectorization factor for scalar instructions.
for (Value *V : VL) {
>From f8cc2ed26ccc1c222624ad227b1828ec0012865c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 00:27:20 -0800
Subject: [PATCH 2/7] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ea79f24e2b86df..708aed16e1377b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7439,7 +7439,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
Opcode0, Opcode1, OpcodeMask))
return true;
SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(0, S.getMainOp()->getNumOperands())) {
+ for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands())) {
Operands.emplace_back();
// Prepare the operand vector.
for (Value *V : VL) {
@@ -8212,7 +8212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
SmallVector<SmallVector<std::pair<Value *, Value *>>> Candidates;
auto *I1 = cast<Instruction>(VL.front());
auto *I2 = cast<Instruction>(VL.back());
- for (int Op = 0, E = S.getMainOp()->getNumOperands(); Op < E; ++Op)
+ for (int Op : seq<int>(S.getMainOp()->getNumOperands()))
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand(Op));
if (static_cast<unsigned>(count_if(
@@ -8225,7 +8225,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (IsCommutative) {
// Check permuted operands.
Candidates.clear();
- for (int Op = 0, E = S.getMainOp()->getNumOperands(); Op < E; ++Op)
+ for (int Op : seq<int>(S.getMainOp()->getNumOperands()))
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand((Op + 1) % E));
if (any_of(
@@ -18983,7 +18983,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!S.getOpcode())
return false;
- Instruction *I0 = cast<Instruction>(S.getOpValue());
+ auto *I0 = cast<Instruction>(S.getOpValue());
// Make sure invalid types (including vector type) are rejected before
// determining vectorization factor for scalar instructions.
for (Value *V : VL) {
>From b39e031eb9639ea883729711dc14ac02f3ffdf16 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 00:42:30 -0800
Subject: [PATCH 3/7] fix conflict
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 41 +------------------
1 file changed, 2 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 35ed279414e2a0..37e8ddc47c240f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8737,49 +8737,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (CI && any_of(VL, [](Value *V) {
return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
})) {
- auto *MainCI = cast<CmpInst>(S.MainOp);
- auto *AltCI = cast<CmpInst>(S.AltOp);
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
CmpInst::Predicate MainP = MainCI->getPredicate();
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
ValueList Left, Right;
-<<<<<<< HEAD
- if (!CI || all_of(VL, [](Value *V) {
- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
- })) {
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- } else {
- auto *MainCI = cast<CmpInst>(S.getMainOp());
- auto *AltCI = cast<CmpInst>(S.getAltOp());
- CmpInst::Predicate MainP = MainCI->getPredicate();
- CmpInst::Predicate AltP = AltCI->getPredicate();
- assert(MainP != AltP &&
- "Expected different main/alternate predicates.");
- // Collect operands - commute if it uses the swapped predicate or
- // alternate operation.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Left.push_back(
- PoisonValue::get(MainCI->getOperand(0)->getType()));
- Right.push_back(
- PoisonValue::get(MainCI->getOperand(1)->getType()));
- continue;
- }
- auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
-
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- } else {
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- }
- Left.push_back(LHS);
- Right.push_back(RHS);
-=======
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
@@ -8787,7 +8751,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
continue;
->>>>>>> upstream/main
}
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
>From 673092800ecea2b96d348710b5cb997af9afe8c8 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 00:42:51 -0800
Subject: [PATCH 4/7] E is used later
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 37e8ddc47c240f..ada01038c8d0d9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8212,7 +8212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (IsCommutative) {
// Check permuted operands.
Candidates.clear();
- for (int Op : seq<int>(S.getMainOp()->getNumOperands()))
+ for (int Op = 0, E = S.getMainOp()->getNumOperands(); Op < E; ++Op)
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand((Op + 1) % E));
if (any_of(
>From 630619238f26b0986c67bdf32827f495ee117231 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 8 Dec 2024 22:09:59 -0800
Subject: [PATCH 5/7] fix conflict
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99cbb7e0193e9c..5fc09dddc7d27e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8790,27 +8790,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
-<<<<<<< HEAD
- if (CI && any_of(VL, [](Value *V) {
- return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
- })) {
- auto *MainCI = cast<CmpInst>(S.getMainOp());
- auto *AltCI = cast<CmpInst>(S.getAltOp());
- CmpInst::Predicate MainP = MainCI->getPredicate();
- CmpInst::Predicate AltP = AltCI->getPredicate();
- assert(MainP != AltP &&
- "Expected different main/alternate predicates.");
-=======
if (isa<BinaryOperator>(VL0) || CI) {
->>>>>>> upstream/main
ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
})) {
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
} else {
- auto *MainCI = cast<CmpInst>(S.MainOp);
- auto *AltCI = cast<CmpInst>(S.AltOp);
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
CmpInst::Predicate MainP = MainCI->getPredicate();
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
>From 9f648ec3c49943252ac1f268552a0723331ad414 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 9 Dec 2024 17:08:54 -0800
Subject: [PATCH 6/7] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5fc09dddc7d27e..d42e8511970eb9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8145,14 +8145,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// a load), in which case peek through to include it in the tree, without
// ballooning over-budget.
if (Depth >= RecursionMaxDepth &&
- !(S.getMainOp() && isa<Instruction>(S.getMainOp()) && !S.isAltShuffle() &&
- VL.size() >= 4 &&
+ !(S.getMainOp() && !S.isAltShuffle() && VL.size() >= 4 &&
(match(S.getMainOp(), m_Load(m_Value())) ||
all_of(VL, [&S](const Value *I) {
return match(I,
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
cast<Instruction>(I)->getOpcode() ==
- cast<Instruction>(S.getMainOp())->getOpcode();
+ S.getMainOp()->getOpcode();
})))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
>From 2b79051d559d37c54d5b5eb3604179ab894fe31f Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 9 Dec 2024 17:52:00 -0800
Subject: [PATCH 7/7] InstructionsState getOpValue can be replaced by getMainOp
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 51 +++++++++----------
1 file changed, 23 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d42e8511970eb9..6969ffb3550094 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -808,16 +808,11 @@ namespace {
/// Main data required for vectorization of instructions.
class InstructionsState {
- /// The very first instruction in the list with the main opcode.
- Value *OpValue = nullptr;
-
- /// The main/alternate instruction.
+ /// The main/alternate instruction. MainOp is also VL0.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
public:
- Value *getOpValue() const { return OpValue; }
-
Instruction *getMainOp() const { return MainOp; }
Instruction *getAltOp() const { return AltOp; }
@@ -840,9 +835,9 @@ class InstructionsState {
}
InstructionsState() = delete;
- InstructionsState(Value *OpValue, Instruction *MainOp, Instruction *AltOp)
- : OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
- static InstructionsState invalid() { return {nullptr, nullptr, nullptr}; }
+ InstructionsState(Instruction *MainOp, Instruction *AltOp)
+ : MainOp(MainOp), AltOp(AltOp) {}
+ static InstructionsState invalid() { return {nullptr, nullptr}; }
};
} // end anonymous namespace
@@ -1080,7 +1075,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}
- return InstructionsState(V, cast<Instruction>(V),
+ return InstructionsState(cast<Instruction>(V),
cast<Instruction>(VL[AltIndex]));
}
@@ -7551,7 +7546,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
unsigned ShuffleOrOp =
S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
- auto *VL0 = cast<Instruction>(S.getOpValue());
+ Instruction *VL0 = S.getMainOp();
switch (ShuffleOrOp) {
case Instruction::PHI: {
// Too many operands - gather, most probably won't be vectorized.
@@ -8085,11 +8080,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check if this is a duplicate of another entry.
if (S.getOpcode()) {
- if (TreeEntry *E = getTreeEntry(S.getOpValue())) {
- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getOpValue()
+ if (TreeEntry *E = getTreeEntry(S.getMainOp())) {
+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp()
<< ".\n");
if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) {
- auto It = MultiNodeScalars.find(S.getOpValue());
+ auto It = MultiNodeScalars.find(S.getMainOp());
if (It != MultiNodeScalars.end()) {
auto *TEIt = find_if(It->getSecond(),
[&](TreeEntry *ME) { return ME->isSame(VL); });
@@ -8102,7 +8097,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
if (!E) {
- if (!doesNotNeedToBeScheduled(S.getOpValue())) {
+ if (!doesNotNeedToBeScheduled(S.getMainOp())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
@@ -8110,8 +8105,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
SmallPtrSet<const TreeEntry *, 4> Nodes;
- Nodes.insert(getTreeEntry(S.getOpValue()));
- for (const TreeEntry *E : MultiNodeScalars.lookup(S.getOpValue()))
+ Nodes.insert(getTreeEntry(S.getMainOp()));
+ for (const TreeEntry *E : MultiNodeScalars.lookup(S.getMainOp()))
Nodes.insert(E);
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
if (any_of(Nodes, [&](const TreeEntry *E) {
@@ -8134,7 +8129,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// used to properly draw the graph rather than for the actual
// vectorization.
E->UserTreeIndices.push_back(UserTreeIdx);
- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getOpValue()
+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
<< ".\n");
return;
}
@@ -8163,7 +8158,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't handle scalable vectors
if (S.getOpcode() == Instruction::ExtractElement &&
isa<ScalableVectorType>(
- cast<ExtractElementInst>(S.getOpValue())->getVectorOperandType())) {
+ cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
@@ -8259,7 +8254,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
if (!AreAllSameInsts || (!S.getOpcode() && allConstant(VL)) || isSplat(VL) ||
(isa_and_present<InsertElementInst, ExtractValueInst, ExtractElementInst>(
- S.getOpValue()) &&
+ S.getMainOp()) &&
!all_of(VL, isVectorLikeInstWithConstOps)) ||
NotProfitableForVectorization(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
@@ -8326,7 +8321,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
- auto *VL0 = cast<Instruction>(S.getOpValue());
+ Instruction *VL0 = S.getMainOp();
BB = VL0->getParent();
if (S.getMainOp() &&
@@ -14495,10 +14490,10 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E,
VE->isSame(TE->Scalars);
}));
};
- TreeEntry *VE = getTreeEntry(S.getOpValue());
+ TreeEntry *VE = getTreeEntry(S.getMainOp());
if (VE && CheckSameVE(VE))
return VE;
- auto It = MultiNodeScalars.find(S.getOpValue());
+ auto It = MultiNodeScalars.find(S.getMainOp());
if (It != MultiNodeScalars.end()) {
auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
return TE != VE && CheckSameVE(TE);
@@ -16985,13 +16980,13 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
- if (isa<PHINode>(S.getOpValue()) ||
- isVectorLikeInstWithConstOps(S.getOpValue()) || doesNotNeedToSchedule(VL))
+ if (isa<PHINode>(S.getMainOp()) ||
+ isVectorLikeInstWithConstOps(S.getMainOp()) || doesNotNeedToSchedule(VL))
return nullptr;
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
- LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getOpValue() << "\n");
+ LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
ScheduleData *Bundle) {
@@ -17072,7 +17067,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
auto *Bundle = buildBundle(VL);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle->isReady()) {
- cancelScheduling(VL, S.getOpValue());
+ cancelScheduling(VL, S.getMainOp());
return std::nullopt;
}
return Bundle;
@@ -18988,7 +18983,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!S.getOpcode())
return false;
- auto *I0 = cast<Instruction>(S.getOpValue());
+ Instruction *I0 = S.getMainOp();
// Make sure invalid types (including vector type) are rejected before
// determining vectorization factor for scalar instructions.
for (Value *V : VL) {
More information about the llvm-commits
mailing list