[llvm] [SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (PR #113880)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 02:45:08 PST 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/113880
>From 87b6e09d312b6e8740e7d3dd8da7d5df9829b72f Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 24 Oct 2024 21:31:15 -0700
Subject: [PATCH 1/6] [SLP] NFC. Replace TreeEntry::setOperandsInOrder with
VLOperands.
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced
by VLOperands.
ArgSize will be provided to make sure other operands will not be
reorderd when VL[0] is IntrinsicInst (because APO is a boolean value).
In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be
removed since it is simple.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 157 ++++++------------
1 file changed, 50 insertions(+), 107 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dc0dffd9fcbf81..2f2af91618ca07 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1949,6 +1949,9 @@ class BoUpSLP {
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
+ unsigned ArgSize;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
+ // arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
- ? IntrinsicNumOperands
- : cast<Instruction>(VL[0])->getNumOperands();
+ ArgSize = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
}
/// \returns the number of operands.
- unsigned getNumOperands() const { return OpsVec.size(); }
+ unsigned getNumOperands() const { return ArgSize; }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2542,7 +2546,8 @@ class BoUpSLP {
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
+ for (ArrayRef<OperandData> Op : make_range(
+ OpsVec.begin() + 1, OpsVec.begin() + getNumOperands())) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
@@ -3064,13 +3069,6 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);
- /// Reorder commutative or alt operands to get better probability of
- /// generating vectorized code.
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R);
-
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
@@ -3265,22 +3263,10 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set the operands of this bundle in their original order.
- void setOperandsInOrder() {
- assert(Operands.empty() && "Already initialized?");
- auto *I0 = cast<Instruction>(Scalars[0]);
- Operands.resize(I0->getNumOperands());
- unsigned NumLanes = Scalars.size();
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
- OpIdx != NumOperands; ++OpIdx) {
- Operands[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- auto *I = cast<Instruction>(Scalars[Lane]);
- assert(I->getNumOperands() == NumOperands &&
- "Expected same number of operands");
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
- }
- }
+ /// Set this bundle's operand from \p Ops.
+ void setOperand(const VLOperands &Ops, unsigned NumOperands) {
+ for (unsigned I : seq(NumOperands))
+ setOperand(I, Ops.getVL(I));
}
/// Reorders operands of the node to the given mask \p Mask.
@@ -8329,7 +8315,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ TE->setOperand(Ops, VL0->getNumOperands());
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8350,27 +8337,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
+ VLOperands Ops(VL, *this);
+ TE->setOperand(Ops, VL0->getNumOperands());
+ if (State == TreeEntry::ScatterVectorize)
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
@@ -8408,8 +8395,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ VLOperands Ops(VL, *this);
+ TE->setOperand(Ops, VL0->getNumOperands());
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8436,12 +8424,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
+ VLOperands Ops(VL, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -8497,20 +8488,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
- }
-
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
+ Ops.reorder();
+ TE->setOperand(Ops, VL0->getNumOperands());
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -8575,7 +8559,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ TE->setOperand(Ops, VL0->getNumOperands());
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8591,46 +8576,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- Operands.emplace_back();
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
- continue;
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.back().push_back(CI2->getArgOperand(I));
- }
- TE->setOperand(I, Operands.back());
- }
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- if (Operands[I - 2].empty())
- continue;
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
- }
- return;
- }
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
+ if (isCommutative(VL0))
+ Ops.reorder();
+ TE->setOperand(Ops, VL0->getNumOperands());
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.push_back(CI2->getArgOperand(I));
- }
- buildTree_rec(Operands, Depth + 1, {TE, I});
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
}
return;
}
@@ -8639,14 +8596,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+ VLOperands Ops(VL, *this);
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
- ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
} else {
auto *MainCI = cast<CmpInst>(S.MainOp);
auto *AltCI = cast<CmpInst>(S.AltOp);
@@ -8654,6 +8611,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
+ ValueList Left, Right;
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
@@ -8671,16 +8629,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Left.push_back(LHS);
Right.push_back(RHS);
}
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ return;
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
}
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ TE->setOperand(Ops, VL0->getNumOperands());
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -13300,21 +13258,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}
-// Perform operand reordering on the instructions in VL and return the reordered
-// operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R) {
- if (VL.empty())
- return;
- VLOperands Ops(VL, R);
- // Reorder the operands in place.
- Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
-}
-
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
>From f6c5af86d6d505759cb29e01dad1908d3d1da735 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 19 Nov 2024 22:27:10 -0800
Subject: [PATCH 2/6] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2f2af91618ca07..15589be3044bdd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1951,7 +1951,7 @@ class BoUpSLP {
SmallVector<OperandDataVec, 4> OpsVec;
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
- unsigned ArgSize;
+ unsigned ArgSize = 0;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2546,8 +2546,8 @@ class BoUpSLP {
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
- for (ArrayRef<OperandData> Op : make_range(
- OpsVec.begin() + 1, OpsVec.begin() + getNumOperands())) {
+ for (ArrayRef<OperandData> Op :
+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
>From f106d8349ad630ba32147bc34e17eedff2aa997b Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 21 Nov 2024 04:56:10 -0800
Subject: [PATCH 3/6] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 43 ++++++++-----------
1 file changed, 17 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 15589be3044bdd..316848a27bc733 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3263,9 +3263,14 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set this bundle's operand from \p Ops.
- void setOperand(const VLOperands &Ops, unsigned NumOperands) {
- for (unsigned I : seq(NumOperands))
+ /// Set this bundle's operand from \p VL.
+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
+ bool RequireReorder = false) {
+ VLOperands Ops(VL, R);
+ if (RequireReorder)
+ Ops.reorder();
+ for (unsigned I :
+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
@@ -8315,8 +8320,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- VLOperands Ops(VL, *this);
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8354,8 +8358,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- VLOperands Ops(VL, *this);
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this);
if (State == TreeEntry::ScatterVectorize)
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
@@ -8395,8 +8398,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- VLOperands Ops(VL, *this);
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
@@ -8488,12 +8490,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- VLOperands Ops(VL, *this);
- // Sort operands of the instructions so that each side is more likely to
- // have the same opcode.
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
- Ops.reorder();
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -8559,8 +8556,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- VLOperands Ops(VL, *this);
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8576,12 +8572,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- VLOperands Ops(VL, *this);
- // Sort operands of the instructions so that each side is more likely to
- // have the same opcode.
- if (isCommutative(VL0))
- Ops.reorder();
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this, isCommutative(VL0));
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
@@ -8596,14 +8587,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
- VLOperands Ops(VL, *this);
+ bool RequireReorder = false;
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
- Ops.reorder();
+ RequireReorder = true;
} else {
auto *MainCI = cast<CmpInst>(S.MainOp);
auto *AltCI = cast<CmpInst>(S.AltOp);
@@ -8637,7 +8628,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- TE->setOperand(Ops, VL0->getNumOperands());
+ TE->setOperand(VL, *this, RequireReorder);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
>From b532cfa9382b197a0a89f9a81fba19e65063fe4a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 21 Nov 2024 20:05:18 -0800
Subject: [PATCH 4/6] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 67 +++++++++----------
1 file changed, 31 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 316848a27bc733..d8c596eb875d91 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8587,48 +8587,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
- bool RequireReorder = false;
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
- if (isa<BinaryOperator>(VL0) || CI) {
- if (!CI || all_of(VL, [](Value *V) {
- return cast<CmpInst>(V)->isCommutative();
- })) {
- RequireReorder = true;
- } else {
- auto *MainCI = cast<CmpInst>(S.MainOp);
- auto *AltCI = cast<CmpInst>(S.AltOp);
- CmpInst::Predicate MainP = MainCI->getPredicate();
- CmpInst::Predicate AltP = AltCI->getPredicate();
- assert(MainP != AltP &&
- "Expected different main/alternate predicates.");
- ValueList Left, Right;
- // Collect operands - commute if it uses the swapped predicate or
- // alternate operation.
- for (Value *V : VL) {
- auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
-
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- } else {
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- }
- Left.push_back(LHS);
- Right.push_back(RHS);
+ if (CI && any_of(VL, [](Value *V) {
+ return !cast<CmpInst>(V)->isCommutative();
+ })) {
+ auto *MainCI = cast<CmpInst>(S.MainOp);
+ auto *AltCI = cast<CmpInst>(S.AltOp);
+ CmpInst::Predicate MainP = MainCI->getPredicate();
+ CmpInst::Predicate AltP = AltCI->getPredicate();
+ assert(MainP != AltP &&
+ "Expected different main/alternate predicates.");
+ ValueList Left, Right;
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
+ } else {
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
+ Left.push_back(LHS);
+ Right.push_back(RHS);
}
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ return;
}
- TE->setOperand(VL, *this, RequireReorder);
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
>From 664f2d333ee051c30693b95ff8c59cf262c35cce Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 25 Nov 2024 08:53:29 -0800
Subject: [PATCH 5/6] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d8c596eb875d91..54210cc0b14988 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3267,8 +3267,18 @@ class BoUpSLP {
void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
bool RequireReorder = false) {
VLOperands Ops(VL, R);
- if (RequireReorder)
+ if (RequireReorder) {
Ops.reorder();
+ if (auto *CI = dyn_cast<CallInst>(VL[0])) {
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI);
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
+ continue;
+ setOperand(I, Ops.getVL(I));
+ }
+ return;
+ }
+ }
for (unsigned I :
seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
setOperand(I, Ops.getVL(I));
>From 4e9392e572ac4ff8f9a8d1176a734073e8a5df83 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 3 Dec 2024 02:44:49 -0800
Subject: [PATCH 6/6] fix
llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 37 ++++++++++---------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 54210cc0b14988..09c5137c0749a5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3267,17 +3267,16 @@ class BoUpSLP {
void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
bool RequireReorder = false) {
VLOperands Ops(VL, R);
- if (RequireReorder) {
+ if (RequireReorder)
Ops.reorder();
- if (auto *CI = dyn_cast<CallInst>(VL[0])) {
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI);
- for (unsigned I : seq<unsigned>(CI->arg_size())) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
- continue;
- setOperand(I, Ops.getVL(I));
- }
- return;
+ if (auto *CI = dyn_cast<CallInst>(VL[0])) {
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI);
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
+ continue;
+ setOperand(I, Ops.getVL(I));
}
+ return;
}
for (unsigned I :
seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
@@ -4063,11 +4062,10 @@ class BoUpSLP {
// immediates do not affect scheduler behavior this is considered
// okay.
auto *In = BundleMember->Inst;
- assert(
- In &&
- (isa<ExtractValueInst, ExtractElementInst, IntrinsicInst>(In) ||
- In->getNumOperands() == TE->getNumOperands()) &&
- "Missed TreeEntry operands?");
+ assert(In &&
+ (isa<ExtractValueInst, ExtractElementInst, CallInst>(In) ||
+ In->getNumOperands() == TE->getNumOperands()) &&
+ "Missed TreeEntry operands?");
(void)In; // fake use to avoid build failure when assertions disabled
for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
@@ -16921,9 +16919,14 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
BundleMember->resetUnscheduledDeps();
// Handle def-use chain dependencies.
- for (User *U : BundleMember->Inst->users()) {
- if (ScheduleData *UseSD = getScheduleData(cast<Instruction>(U))) {
- BundleMember->Dependencies++;
+ for (Use &U : BundleMember->Inst->uses()) {
+ User *User = U.getUser();
+ if (ScheduleData *UseSD = getScheduleData(cast<Instruction>(User))) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!CI ||
+ !isVectorIntrinsicWithScalarOpAtArg(
+ getVectorIntrinsicIDForCall(CI, SLP->TLI), U.getOperandNo()))
+ BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
BundleMember->incrementUnscheduledDeps(1);
More information about the llvm-commits
mailing list