[llvm] [SLP] NFC. Use InstructionsState::valid if users just want to know whether VL has same opcode. (PR #120217)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 26 21:54:31 PST 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/120217
>From 5624823cb9b7e7a14c84f3991eeeb24f0f97ebe6 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 17 Dec 2024 01:37:04 -0800
Subject: [PATCH 1/7] [SLP] NFC. Use getMainOp if users just want to know
whether VL has same opcode.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 27 +++++++++----------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d967813075bb9f..8a6958c3541bd7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -868,8 +868,8 @@ static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
BaseOp0 == Op0 || BaseOp1 == Op1 ||
- getSameOpcode({BaseOp0, Op0}, TLI).getOpcode() ||
- getSameOpcode({BaseOp1, Op1}, TLI).getOpcode();
+ getSameOpcode({BaseOp0, Op0}, TLI).getMainOp() ||
+ getSameOpcode({BaseOp1, Op1}, TLI).getMainOp();
}
/// \returns true if a compare instruction \p CI has similar "look" and
@@ -2380,7 +2380,7 @@ class BoUpSLP {
// Use Boyer-Moore majority voting for finding the majority opcode and
// the number of times it occurs.
if (auto *I = dyn_cast<Instruction>(OpData.V)) {
- if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getOpcode() ||
+ if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getMainOp() ||
I->getParent() != Parent) {
if (NumOpsWithSameOpcodeParent == 0) {
NumOpsWithSameOpcodeParent = 1;
@@ -2500,7 +2500,7 @@ class BoUpSLP {
// next lane does not build same opcode sequence.
(Lns == 2 &&
!getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
- .getOpcode() &&
+ .getMainOp() &&
isa<Constant>(Data.V)))) ||
// 3. The operand in the current lane is loop invariant (can be
// hoisted out) and another operand is also a loop invariant
@@ -2509,7 +2509,7 @@ class BoUpSLP {
// FIXME: need to teach the cost model about this case for better
// estimation.
(IsInvariant && !isa<Constant>(Data.V) &&
- !getSameOpcode({Op, Data.V}, TLI).getOpcode() &&
+ !getSameOpcode({Op, Data.V}, TLI).getMainOp() &&
L->isLoopInvariant(Data.V))) {
FoundCandidate = true;
Data.IsUsed = Data.V == Op;
@@ -2539,7 +2539,7 @@ class BoUpSLP {
return true;
Value *OpILn = getValue(OpI, Ln);
return (L && L->isLoopInvariant(OpILn)) ||
- (getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
+ (getSameOpcode({Op, OpILn}, TLI).getMainOp() &&
allSameBlock({Op, OpILn}));
}))
return true;
@@ -4766,7 +4766,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
!CompareOpcodes ||
(GEP1 && GEP2 &&
getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)
- .getOpcode()));
+ .getMainOp()));
}
/// Calculates minimal alignment as a common alignment.
@@ -13223,7 +13223,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
Value *In1 = PHI1->getIncomingValue(I);
if (isConstant(In) && isConstant(In1))
continue;
- if (!getSameOpcode({In, In1}, *TLI).getOpcode())
+ if (!getSameOpcode({In, In1}, *TLI).getMainOp())
return false;
if (cast<Instruction>(In)->getParent() !=
cast<Instruction>(In1)->getParent())
@@ -13251,7 +13251,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
if (It != UsedValuesEntry.end())
UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second;
return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE &&
- getSameOpcode({V, V1}, *TLI).getOpcode() &&
+ getSameOpcode({V, V1}, *TLI).getMainOp() &&
cast<Instruction>(V)->getParent() ==
cast<Instruction>(V1)->getParent() &&
(!isa<PHINode>(V1) || AreCompatiblePHIs(V, V1));
@@ -21346,8 +21346,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return false;
if (I1->getParent() != I2->getParent())
return false;
- InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- if (S.getOpcode())
+ if (getSameOpcode({I1, I2}, *TLI).getMainOp())
continue;
return false;
}
@@ -21701,8 +21700,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- if (S.getOpcode())
+ if (getSameOpcode({I1, I2}, *TLI).getMainOp())
return false;
return I1->getOpcode() < I2->getOpcode();
}
@@ -21728,8 +21726,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
if (I1->getParent() != I2->getParent())
return false;
- InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- return S.getOpcode() > 0;
+ return getSameOpcode({I1, I2}, *TLI).getMainOp() != nullptr;
}
if (isa<Constant>(V1->getValueOperand()) &&
isa<Constant>(V2->getValueOperand()))
>From 3b885f24b0287a0e08642fc9502ec3e438b24faf Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 17 Dec 2024 22:30:03 -0800
Subject: [PATCH 2/7] add InstructionsState::valid
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 28 ++++++++++---------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8a6958c3541bd7..3c8b7dca86b62e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -836,6 +836,8 @@ class InstructionsState {
return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
}
+ bool valid() const { return MainOp != nullptr; }
+
InstructionsState() = delete;
InstructionsState(Instruction *MainOp, Instruction *AltOp)
: MainOp(MainOp), AltOp(AltOp) {}
@@ -868,8 +870,8 @@ static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
BaseOp0 == Op0 || BaseOp1 == Op1 ||
- getSameOpcode({BaseOp0, Op0}, TLI).getMainOp() ||
- getSameOpcode({BaseOp1, Op1}, TLI).getMainOp();
+ getSameOpcode({BaseOp0, Op0}, TLI).valid() ||
+ getSameOpcode({BaseOp1, Op1}, TLI).valid();
}
/// \returns true if a compare instruction \p CI has similar "look" and
@@ -2380,7 +2382,7 @@ class BoUpSLP {
// Use Boyer-Moore majority voting for finding the majority opcode and
// the number of times it occurs.
if (auto *I = dyn_cast<Instruction>(OpData.V)) {
- if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getMainOp() ||
+ if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).valid() ||
I->getParent() != Parent) {
if (NumOpsWithSameOpcodeParent == 0) {
NumOpsWithSameOpcodeParent = 1;
@@ -2500,7 +2502,7 @@ class BoUpSLP {
// next lane does not build same opcode sequence.
(Lns == 2 &&
!getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
- .getMainOp() &&
+ .valid() &&
isa<Constant>(Data.V)))) ||
// 3. The operand in the current lane is loop invariant (can be
// hoisted out) and another operand is also a loop invariant
@@ -2509,7 +2511,7 @@ class BoUpSLP {
// FIXME: need to teach the cost model about this case for better
// estimation.
(IsInvariant && !isa<Constant>(Data.V) &&
- !getSameOpcode({Op, Data.V}, TLI).getMainOp() &&
+ !getSameOpcode({Op, Data.V}, TLI).valid() &&
L->isLoopInvariant(Data.V))) {
FoundCandidate = true;
Data.IsUsed = Data.V == Op;
@@ -2539,7 +2541,7 @@ class BoUpSLP {
return true;
Value *OpILn = getValue(OpI, Ln);
return (L && L->isLoopInvariant(OpILn)) ||
- (getSameOpcode({Op, OpILn}, TLI).getMainOp() &&
+ (getSameOpcode({Op, OpILn}, TLI).valid() &&
allSameBlock({Op, OpILn}));
}))
return true;
@@ -4766,7 +4768,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
!CompareOpcodes ||
(GEP1 && GEP2 &&
getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)
- .getMainOp()));
+ .valid()));
}
/// Calculates minimal alignment as a common alignment.
@@ -7488,7 +7490,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
[&](ArrayRef<Value *> Op) {
if (allConstant(Op) ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
- getSameOpcode(Op, *TLI).getMainOp()))
+ getSameOpcode(Op, *TLI).valid()))
return false;
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
@@ -13223,7 +13225,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
Value *In1 = PHI1->getIncomingValue(I);
if (isConstant(In) && isConstant(In1))
continue;
- if (!getSameOpcode({In, In1}, *TLI).getMainOp())
+ if (!getSameOpcode({In, In1}, *TLI).valid())
return false;
if (cast<Instruction>(In)->getParent() !=
cast<Instruction>(In1)->getParent())
@@ -13251,7 +13253,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
if (It != UsedValuesEntry.end())
UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second;
return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE &&
- getSameOpcode({V, V1}, *TLI).getMainOp() &&
+ getSameOpcode({V, V1}, *TLI).valid() &&
cast<Instruction>(V)->getParent() ==
cast<Instruction>(V1)->getParent() &&
(!isa<PHINode>(V1) || AreCompatiblePHIs(V, V1));
@@ -21346,7 +21348,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return false;
if (I1->getParent() != I2->getParent())
return false;
- if (getSameOpcode({I1, I2}, *TLI).getMainOp())
+ if (getSameOpcode({I1, I2}, *TLI).valid())
continue;
return false;
}
@@ -21700,7 +21702,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- if (getSameOpcode({I1, I2}, *TLI).getMainOp())
+ if (getSameOpcode({I1, I2}, *TLI).valid())
return false;
return I1->getOpcode() < I2->getOpcode();
}
@@ -21726,7 +21728,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
if (I1->getParent() != I2->getParent())
return false;
- return getSameOpcode({I1, I2}, *TLI).getMainOp() != nullptr;
+ return getSameOpcode({I1, I2}, *TLI).valid();
}
if (isa<Constant>(V1->getValueOperand()) &&
isa<Constant>(V2->getValueOperand()))
>From 533b3310efc1e8851addedcf4f91ce8f3f5243d2 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 23 Dec 2024 17:14:48 -0800
Subject: [PATCH 3/7] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 31 ++++++++++---------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3c8b7dca86b62e..581682274ff562 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -836,7 +836,10 @@ class InstructionsState {
return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
}
- bool valid() const { return MainOp != nullptr; }
+ /// Checks if the current state is valid, i.e. has non-null MainOp
+ bool valid() const { return MainOp; }
+
+ explicit operator bool() const { return valid(); }
InstructionsState() = delete;
InstructionsState(Instruction *MainOp, Instruction *AltOp)
@@ -870,8 +873,8 @@ static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
BaseOp0 == Op0 || BaseOp1 == Op1 ||
- getSameOpcode({BaseOp0, Op0}, TLI).valid() ||
- getSameOpcode({BaseOp1, Op1}, TLI).valid();
+ getSameOpcode({BaseOp0, Op0}, TLI) ||
+ getSameOpcode({BaseOp1, Op1}, TLI);
}
/// \returns true if a compare instruction \p CI has similar "look" and
@@ -2382,7 +2385,7 @@ class BoUpSLP {
// Use Boyer-Moore majority voting for finding the majority opcode and
// the number of times it occurs.
if (auto *I = dyn_cast<Instruction>(OpData.V)) {
- if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).valid() ||
+ if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI) ||
I->getParent() != Parent) {
if (NumOpsWithSameOpcodeParent == 0) {
NumOpsWithSameOpcodeParent = 1;
@@ -2501,8 +2504,7 @@ class BoUpSLP {
// 2.1. If we have only 2 lanes, need to check that value in the
// next lane does not build same opcode sequence.
(Lns == 2 &&
- !getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
- .valid() &&
+ !getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI) &&
isa<Constant>(Data.V)))) ||
// 3. The operand in the current lane is loop invariant (can be
// hoisted out) and another operand is also a loop invariant
@@ -2511,7 +2513,7 @@ class BoUpSLP {
// FIXME: need to teach the cost model about this case for better
// estimation.
(IsInvariant && !isa<Constant>(Data.V) &&
- !getSameOpcode({Op, Data.V}, TLI).valid() &&
+ !getSameOpcode({Op, Data.V}, TLI) &&
L->isLoopInvariant(Data.V))) {
FoundCandidate = true;
Data.IsUsed = Data.V == Op;
@@ -2541,7 +2543,7 @@ class BoUpSLP {
return true;
Value *OpILn = getValue(OpI, Ln);
return (L && L->isLoopInvariant(OpILn)) ||
- (getSameOpcode({Op, OpILn}, TLI).valid() &&
+ (getSameOpcode({Op, OpILn}, TLI) &&
allSameBlock({Op, OpILn}));
}))
return true;
@@ -4767,8 +4769,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
(!GEP2 || isConstant(GEP2->getOperand(1)))) ||
!CompareOpcodes ||
(GEP1 && GEP2 &&
- getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)
- .valid()));
+ getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)));
}
/// Calculates minimal alignment as a common alignment.
@@ -7490,7 +7491,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
[&](ArrayRef<Value *> Op) {
if (allConstant(Op) ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
- getSameOpcode(Op, *TLI).valid()))
+ getSameOpcode(Op, *TLI)))
return false;
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
@@ -13225,7 +13226,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
Value *In1 = PHI1->getIncomingValue(I);
if (isConstant(In) && isConstant(In1))
continue;
- if (!getSameOpcode({In, In1}, *TLI).valid())
+ if (!getSameOpcode({In, In1}, *TLI))
return false;
if (cast<Instruction>(In)->getParent() !=
cast<Instruction>(In1)->getParent())
@@ -13253,7 +13254,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
if (It != UsedValuesEntry.end())
UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second;
return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE &&
- getSameOpcode({V, V1}, *TLI).valid() &&
+ getSameOpcode({V, V1}, *TLI) &&
cast<Instruction>(V)->getParent() ==
cast<Instruction>(V1)->getParent() &&
(!isa<PHINode>(V1) || AreCompatiblePHIs(V, V1));
@@ -21348,7 +21349,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return false;
if (I1->getParent() != I2->getParent())
return false;
- if (getSameOpcode({I1, I2}, *TLI).valid())
+ if (getSameOpcode({I1, I2}, *TLI))
continue;
return false;
}
@@ -21702,7 +21703,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- if (getSameOpcode({I1, I2}, *TLI).valid())
+ if (getSameOpcode({I1, I2}, *TLI))
return false;
return I1->getOpcode() < I2->getOpcode();
}
>From 1f60b793d8b9653089947503a6145943a8b1abfe Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 17 Dec 2024 02:15:42 -0800
Subject: [PATCH 4/7] [SLP] NFC. Use InstructionsState::getOpcode only when
necessary.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 86 +++++++++----------
1 file changed, 42 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 581682274ff562..4d0f4ab491880f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -821,7 +821,8 @@ class InstructionsState {
/// The main/alternate opcodes for the list of instructions.
unsigned getOpcode() const {
- return MainOp ? MainOp->getOpcode() : 0;
+ assert(MainOp && "InstructionsState is invalid.");
+ return MainOp->getOpcode();
}
unsigned getAltOpcode() const {
@@ -1850,7 +1851,7 @@ class BoUpSLP {
InstructionsState S = getSameOpcode(Ops, TLI);
// Note: Only consider instructions with <= 2 operands to avoid
// complexity explosion.
- if (S.getOpcode() &&
+ if (S &&
(S.getMainOp()->getNumOperands() <= 2 || !MainAltOps.empty() ||
!S.isAltShuffle()) &&
all_of(Ops, [&S](Value *V) {
@@ -2700,7 +2701,7 @@ class BoUpSLP {
OperandData &AltOp = getData(OpIdx, Lane);
InstructionsState OpS =
getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI);
- if (OpS.getOpcode() && OpS.isAltShuffle())
+ if (OpS && OpS.isAltShuffle())
MainAltOps[OpIdx].push_back(AltOp.V);
}
}
@@ -3595,7 +3596,7 @@ class BoUpSLP {
"Need to vectorize gather entry?");
// Gathered loads still gathered? Do not create entry, use the original one.
if (GatheredLoadsEntriesFirst.has_value() &&
- EntryState == TreeEntry::NeedToGather &&
+ EntryState == TreeEntry::NeedToGather && S &&
S.getOpcode() == Instruction::Load && UserTreeIdx.EdgeIdx == UINT_MAX &&
!UserTreeIdx.UserTE)
return nullptr;
@@ -8062,15 +8063,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
// place to insert a shuffle if we need to, so just avoid that issue.
- if (S.getMainOp() &&
- isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
+ if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return;
}
// Check if this is a duplicate of another entry.
- if (S.getOpcode()) {
+ if (S) {
if (TreeEntry *E = getTreeEntry(S.getMainOp())) {
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp()
<< ".\n");
@@ -8131,13 +8131,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// a load), in which case peek through to include it in the tree, without
// ballooning over-budget.
if (Depth >= RecursionMaxDepth &&
- !(S.getMainOp() && !S.isAltShuffle() && VL.size() >= 4 &&
+ !(S && !S.isAltShuffle() && VL.size() >= 4 &&
(match(S.getMainOp(), m_Load(m_Value())) ||
all_of(VL, [&S](const Value *I) {
return match(I,
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
- cast<Instruction>(I)->getOpcode() ==
- S.getMainOp()->getOpcode();
+ cast<Instruction>(I)->getOpcode() == S.getOpcode();
})))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
@@ -8147,15 +8146,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Don't handle scalable vectors
- if (S.getOpcode() == Instruction::ExtractElement &&
- isa<ScalableVectorType>(
- cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
+ if (auto *EE = dyn_cast_if_present<ExtractElementInst>(S.getMainOp()))
+ if (isa<ScalableVectorType>(EE->getVectorOperandType())) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ return;
+ }
// Don't handle vectors.
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
@@ -8171,7 +8169,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// vectorize.
auto &&NotProfitableForVectorization = [&S, this,
Depth](ArrayRef<Value *> VL) {
- if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2)
+ if (!S || !S.isAltShuffle() || VL.size() > 2)
return false;
if (VectorizableTree.size() < MinTreeSize)
return false;
@@ -8226,7 +8224,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
bool IsScatterVectorizeUserTE =
UserTreeIdx.UserTE &&
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
- bool AreAllSameBlock = S.getOpcode() && allSameBlock(VL);
+ bool AreAllSameBlock = S && allSameBlock(VL);
bool AreScatterAllGEPSameBlock =
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
VL.size() > 2 &&
@@ -8243,7 +8241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
SortedIndices));
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
- if (!AreAllSameInsts || (!S.getOpcode() && allConstant(VL)) || isSplat(VL) ||
+ if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
(isa_and_present<InsertElementInst, ExtractValueInst, ExtractElementInst>(
S.getMainOp()) &&
!all_of(VL, isVectorLikeInstWithConstOps)) ||
@@ -8256,7 +8254,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Don't vectorize ephemeral values.
- if (S.getOpcode() && !EphValues.empty()) {
+ if (S && !EphValues.empty()) {
for (Value *V : VL) {
if (EphValues.count(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -8315,7 +8313,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Instruction *VL0 = S.getMainOp();
BB = VL0->getParent();
- if (S.getMainOp() &&
+ if (S &&
(BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
!DT->isReachableFromEntry(BB))) {
// Don't go into unreachable blocks. They may contain instructions with
@@ -8369,8 +8367,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
- unsigned ShuffleOrOp = S.isAltShuffle() ?
- (unsigned) Instruction::ShuffleVector : S.getOpcode();
+ unsigned ShuffleOrOp =
+ S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
auto CreateOperandNodes = [&](TreeEntry *TE, const auto &Operands) {
// Postpone PHI nodes creation
SmallVector<unsigned> PHIOps;
@@ -8379,7 +8377,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (Op.empty())
continue;
InstructionsState S = getSameOpcode(Op, *TLI);
- if (S.getOpcode() != Instruction::PHI || S.isAltShuffle())
+ if ((!S || S.getOpcode() != Instruction::PHI) || S.isAltShuffle())
buildTree_rec(Op, Depth + 1, {TE, I});
else
PHIOps.push_back(I);
@@ -9733,7 +9731,7 @@ void BoUpSLP::transformNodes() {
if (IsSplat)
continue;
InstructionsState S = getSameOpcode(Slice, *TLI);
- if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice) ||
+ if (!S || S.isAltShuffle() || !allSameBlock(Slice) ||
(S.getOpcode() == Instruction::Load &&
areKnownNonVectorizableLoads(Slice)) ||
(S.getOpcode() != Instruction::Load && !has_single_bit(VF)))
@@ -11050,7 +11048,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (const TreeEntry *OpTE = getTreeEntry(V))
return getCastContextHint(*OpTE);
InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
- if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle())
+ if (SrcState && SrcState.getOpcode() == Instruction::Load &&
+ !SrcState.isAltShuffle())
return TTI::CastContextHint::GatherScatter;
return TTI::CastContextHint::None;
};
@@ -14397,12 +14396,12 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E,
ArrayRef<Value *> VL = E->getOperand(NodeIdx);
InstructionsState S = getSameOpcode(VL, *TLI);
// Special processing for GEPs bundle, which may include non-gep values.
- if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
+ if (!S && VL.front()->getType()->isPointerTy()) {
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
if (It != VL.end())
S = getSameOpcode(*It, *TLI);
}
- if (!S.getOpcode())
+ if (!S)
return nullptr;
auto CheckSameVE = [&](const TreeEntry *VE) {
return VE->isSame(VL) &&
@@ -15062,7 +15061,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
if (E->isGather()) {
// Set insert point for non-reduction initial nodes.
- if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
+ if (E->getMainOp() != nullptr && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs);
E->VectorizedValue = Vec;
@@ -18379,8 +18378,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
hasFullVectorsOrPowerOf2(*TTI, ValOps.front()->getType(),
ValOps.size()) ||
(VectorizeNonPowerOf2 && has_single_bit(ValOps.size() + 1));
- if ((!IsAllowedSize && S.getOpcode() &&
- S.getOpcode() != Instruction::Load &&
+ if ((!IsAllowedSize && S && S.getOpcode() != Instruction::Load &&
(!S.getMainOp()->isSafeToRemove() ||
any_of(ValOps.getArrayRef(),
[&](Value *V) {
@@ -18390,8 +18388,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
return !Stores.contains(U);
}));
}))) ||
- (ValOps.size() > Chain.size() / 2 && !S.getOpcode())) {
- Size = (!IsAllowedSize && S.getOpcode()) ? 1 : 2;
+ (ValOps.size() > Chain.size() / 2 && !S)) {
+ Size = (!IsAllowedSize && S) ? 1 : 2;
return false;
}
}
@@ -18414,7 +18412,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
R.computeMinimumValueSizes();
Size = R.getCanonicalGraphSize();
- if (S.getOpcode() == Instruction::Load)
+ if (S && S.getOpcode() == Instruction::Load)
Size = 2; // cut off masked gather small trees
InstructionCost Cost = R.getTreeCost();
@@ -18915,7 +18913,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// Check that all of the parts are instructions of the same type,
// we permit an alternate opcode via InstructionsState.
InstructionsState S = getSameOpcode(VL, *TLI);
- if (!S.getOpcode())
+ if (!S)
return false;
Instruction *I0 = S.getMainOp();
@@ -19727,16 +19725,16 @@ class HorizontalReduction {
// Also check if the instruction was folded to constant/other value.
auto *Inst = dyn_cast<Instruction>(RdxVal);
if ((Inst && isVectorLikeInstWithConstOps(Inst) &&
- (!S.getOpcode() || !S.isOpcodeOrAlt(Inst))) ||
- (S.getOpcode() && !Inst))
+ (!S || !S.isOpcodeOrAlt(Inst))) ||
+ (S && !Inst))
continue;
Candidates.push_back(RdxVal);
TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]);
}
bool ShuffledExtracts = false;
// Try to handle shuffled extractelements.
- if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() &&
- I + 1 < E) {
+ if (S && S.getOpcode() == Instruction::ExtractElement &&
+ !S.isAltShuffle() && I + 1 < E) {
SmallVector<Value *> CommonCandidates(Candidates);
for (Value *RV : ReducedVals[I + 1]) {
Value *RdxVal = TrackedVals.at(RV);
@@ -21131,7 +21129,7 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
}
InstructionsState S = getSameOpcode({I1, I2}, TLI);
- if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle()))
+ if (S && (IsCompatibility || !S.isAltShuffle()))
continue;
if (IsCompatibility)
return false;
@@ -21286,7 +21284,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- if (S.getOpcode() && !S.isAltShuffle())
+ if (S && !S.isAltShuffle())
continue;
return I1->getOpcode() < I2->getOpcode();
}
>From dd65671bb1dbdbabc05be5942117cb1e22dc10dc Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 25 Dec 2024 01:26:35 -0800
Subject: [PATCH 5/7] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 29 ++++++++++---------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4d0f4ab491880f..6ea663b6a8e3f8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -821,16 +821,17 @@ class InstructionsState {
/// The main/alternate opcodes for the list of instructions.
unsigned getOpcode() const {
- assert(MainOp && "InstructionsState is invalid.");
- return MainOp->getOpcode();
+ assert(valid() && "InstructionsState is invalid.");
+ return getMainOp()->getOpcode();
}
unsigned getAltOpcode() const {
- return AltOp ? AltOp->getOpcode() : 0;
+ assert(valid() && "InstructionsState is invalid.");
+ return getAltOp()->getOpcode();
}
/// Some of the instructions in the list have alternate opcodes.
- bool isAltShuffle() const { return AltOp != MainOp; }
+ bool isAltShuffle() const { return getMainOp() != getAltOp(); }
bool isOpcodeOrAlt(Instruction *I) const {
unsigned CheckedOpcode = I->getOpcode();
@@ -838,7 +839,7 @@ class InstructionsState {
}
/// Checks if the current state is valid, i.e. has non-null MainOp
- bool valid() const { return MainOp; }
+ bool valid() const { return getMainOp() && getAltOp(); }
explicit operator bool() const { return valid(); }
@@ -8146,14 +8147,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Don't handle scalable vectors
- if (auto *EE = dyn_cast_if_present<ExtractElementInst>(S.getMainOp()))
- if (isa<ScalableVectorType>(EE->getVectorOperandType())) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
+ if (auto *EE = dyn_cast_if_present<ExtractElementInst>(S.getMainOp());
+ EE && isa<ScalableVectorType>(EE->getVectorOperandType())) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ return;
+ }
// Don't handle vectors.
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
@@ -15061,7 +15062,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
if (E->isGather()) {
// Set insert point for non-reduction initial nodes.
- if (E->getMainOp() != nullptr && E->Idx == 0 && !UserIgnoreList)
+ if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs);
E->VectorizedValue = Vec;
>From 5305e3798c4a381d375684e71fe662de40474e5b Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 26 Dec 2024 10:26:15 -0800
Subject: [PATCH 6/7] fix 0d6cb0ae9d4ff610f729d0fd1bbd27227e6628cf
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bea54f0b344750..03fc9157cd4619 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21879,11 +21879,6 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
-<<<<<<< HEAD
- if (getSameOpcode({I1, I2}, *TLI))
- return false;
-=======
->>>>>>> origin/main
return I1->getOpcode() < I2->getOpcode();
}
return V->getValueOperand()->getValueID() <
>From 1c16e5cc54bcbd24eb51f2609f3a098b27d80b96 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 26 Dec 2024 10:37:35 -0800
Subject: [PATCH 7/7] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 03fc9157cd4619..147a85a59efc02 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8152,8 +8152,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Don't handle scalable vectors
- if (auto *EE = dyn_cast_if_present<ExtractElementInst>(S.getMainOp());
- EE && isa<ScalableVectorType>(EE->getVectorOperandType())) {
+ if (S && S.getOpcode() == Instruction::ExtractElement &&
+ isa<ScalableVectorType>(
+ cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
@@ -8248,7 +8249,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
SortedIndices));
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
- (isa_and_present<InsertElementInst, ExtractValueInst, ExtractElementInst>(
+ (S &&
+ isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
S.getMainOp()) &&
!all_of(VL, isVectorLikeInstWithConstOps)) ||
NotProfitableForVectorization(VL)) {
More information about the llvm-commits
mailing list