[llvm] [SLP] Fix PoisonValue in the argument VL of setOperand. (PR #118949)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 09:45:53 PST 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/118949
>From 2a81c2c03389511ff1a33e9374b0cb27afe8dbbd Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 02:05:55 -0800
Subject: [PATCH 1/6] [SLP] Pre-commit test.
---
llvm/test/Transforms/SLPVectorizer/fix-113880.ll | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/fix-113880.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/fix-113880.ll b/llvm/test/Transforms/SLPVectorizer/fix-113880.ll
new file mode 100644
index 00000000000000..9d097c2081b0fd
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/fix-113880.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
+
+define ptr @test() {
+ store double poison, ptr null, align 8
+ %1 = getelementptr i8, ptr null, i64 8
+ %2 = fmul double 0.000000e+00, 0.000000e+00
+ store double %2, ptr %1, align 8
+ %3 = getelementptr i8, ptr null, i64 16
+ %4 = fmul double 0.000000e+00, 0.000000e+00
+ store double %4, ptr %3, align 8
+ %5 = getelementptr i8, ptr null, i64 24
+ store double %2, ptr %5, align 8
+ ret ptr null
+}
>From 4a407ef9e796729e61f4216b0a2cbe1321df6b21 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 02:00:36 -0800
Subject: [PATCH 2/6] [SLP] Fix PoisonValue in the argument VL of setOperand.
In addition, Scalars is already in the struct, we don't need to pass VL here.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 25 +++++++++----------
.../Transforms/SLPVectorizer/fix-113880.ll | 4 +++
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7ea039e04ca728..180238f4832293 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3338,14 +3338,13 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set this bundle's operand from \p VL.
- void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
- bool RequireReorder = false) {
- VLOperands Ops(VL, R);
+ /// Set this bundle's operand from Scalars.
+ void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
+ VLOperands Ops(Scalars, R);
if (RequireReorder)
Ops.reorder();
- for (unsigned I :
- seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
+ for (unsigned I : seq<unsigned>(I0->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
@@ -8446,7 +8445,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperand(VL, *this);
+ TE->setOperand(*this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8484,7 +8483,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- TE->setOperand(VL, *this);
+ TE->setOperand(*this);
if (State == TreeEntry::ScatterVectorize)
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
@@ -8524,7 +8523,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperand(VL, *this);
+ TE->setOperand(*this);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
@@ -8621,7 +8620,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -8687,7 +8686,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperand(VL, *this);
+ TE->setOperand(*this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,7 +8702,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- TE->setOperand(VL, *this, isCommutative(VL0));
+ TE->setOperand(*this, isCommutative(VL0));
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
@@ -8759,7 +8758,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
diff --git a/llvm/test/Transforms/SLPVectorizer/fix-113880.ll b/llvm/test/Transforms/SLPVectorizer/fix-113880.ll
index 9d097c2081b0fd..6c3c95b0d9359f 100644
--- a/llvm/test/Transforms/SLPVectorizer/fix-113880.ll
+++ b/llvm/test/Transforms/SLPVectorizer/fix-113880.ll
@@ -2,6 +2,10 @@
; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
define ptr @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: store <4 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, ptr null, align 8
+; CHECK-NEXT: ret ptr null
+;
store double poison, ptr null, align 8
%1 = getelementptr i8, ptr null, i64 8
%2 = fmul double 0.000000e+00, 0.000000e+00
>From bd9491e6b310e06ca92843e58fde107a5d671ada Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 06:24:42 -0800
Subject: [PATCH 3/6] apply comment
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++++++--------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 180238f4832293..23d4823f72dee4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2403,14 +2403,13 @@ class BoUpSLP {
}
/// Go through the instructions in VL and append their operands.
- void appendOperandsOfVL(ArrayRef<Value *> VL) {
+ void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
- auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
unsigned NumOperands = VL0->getNumOperands();
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
@@ -2542,15 +2541,19 @@ class BoUpSLP {
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
- VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
+ VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R, Instruction *VL0)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
- L(R.LI->getLoopFor(
- (cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
- ->getParent()))) {
+ L(R.LI->getLoopFor((VL0->getParent()))) {
// Append all the operands of RootVL.
- appendOperandsOfVL(RootVL);
+ appendOperandsOfVL(RootVL, VL0);
}
+ /// Initialize with all the operands of the instruction vector \p RootVL.
+ VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
+ : VLOperands(
+ RootVL, R,
+ cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))) {}
+
/// \Returns a value vector with the operands across all lanes for the
/// opearnd at \p OpIdx.
ValueList getVL(unsigned OpIdx) const {
@@ -3340,10 +3343,10 @@ class BoUpSLP {
/// Set this bundle's operand from Scalars.
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
- VLOperands Ops(Scalars, R);
+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
+ VLOperands Ops(Scalars, R, I0);
if (RequireReorder)
Ops.reorder();
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
for (unsigned I : seq<unsigned>(I0->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
>From e9b8dee30608a017d395f8546461b8c2986de8bb Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 8 Dec 2024 23:17:55 -0800
Subject: [PATCH 4/6] VL0 is already calculated by getSameOpcode
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 33 ++++++++-----------
1 file changed, 14 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 23d4823f72dee4..f82b7c50d62fec 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2541,19 +2541,13 @@ class BoUpSLP {
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
- VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R, Instruction *VL0)
+ VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor((VL0->getParent()))) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL, VL0);
}
- /// Initialize with all the operands of the instruction vector \p RootVL.
- VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
- : VLOperands(
- RootVL, R,
- cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))) {}
-
/// \Returns a value vector with the operands across all lanes for the
/// opearnd at \p OpIdx.
ValueList getVL(unsigned OpIdx) const {
@@ -3342,12 +3336,12 @@ class BoUpSLP {
}
/// Set this bundle's operand from Scalars.
- void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
- VLOperands Ops(Scalars, R, I0);
+ void setOperand(Instruction *VL0, const BoUpSLP &R,
+ bool RequireReorder = false) {
+ VLOperands Ops(Scalars, VL0, R);
if (RequireReorder)
Ops.reorder();
- for (unsigned I : seq<unsigned>(I0->getNumOperands()))
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
@@ -8448,7 +8442,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperand(*this);
+ TE->setOperand(VL0, *this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8486,7 +8480,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- TE->setOperand(*this);
+ TE->setOperand(VL0, *this);
if (State == TreeEntry::ScatterVectorize)
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
@@ -8526,7 +8520,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperand(*this);
+ TE->setOperand(VL0, *this);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
@@ -8554,7 +8548,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
- VLOperands Ops(VL, *this);
+ VLOperands Ops(VL, VL0, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
@@ -8623,7 +8617,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
+ TE->setOperand(VL0, *this,
+ isa<BinaryOperator>(VL0) && isCommutative(VL0));
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -8689,7 +8684,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperand(*this);
+ TE->setOperand(VL0, *this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8705,7 +8700,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- TE->setOperand(*this, isCommutative(VL0));
+ TE->setOperand(VL0, *this, isCommutative(VL0));
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
@@ -8761,7 +8756,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
+ TE->setOperand(VL0, *this, isa<BinaryOperator>(VL0) || CI);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
>From 5b5fc900761b79ff3f59ac8251fbef49a862893a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Fri, 6 Dec 2024 12:03:23 +0800
Subject: [PATCH 5/6] [SLP] NFC. Replace TreeEntry::setOperandsInOrder with
VLOperands. (#113880)
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced
by VLOperands.
Arg_size will be provided to make sure other operands will not be
reorderd when VL[0] is IntrinsicInst (because APO is a boolean value).
In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be
removed since it is simple.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 207 ++++++------------
1 file changed, 65 insertions(+), 142 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c729c6cc9195ef..30f7368545e340 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2019,6 +2019,9 @@ class BoUpSLP {
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
+ unsigned ArgSize = 0;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2406,10 +2409,12 @@ class BoUpSLP {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
+ // arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
- : VL0->getNumOperands();
+ unsigned NumOperands = VL0->getNumOperands();
+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2447,7 @@ class BoUpSLP {
}
/// \returns the number of operands.
- unsigned getNumOperands() const { return OpsVec.size(); }
+ unsigned getNumOperands() const { return ArgSize; }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2623,7 +2628,8 @@ class BoUpSLP {
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
+ for (ArrayRef<OperandData> Op :
+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
@@ -3144,13 +3150,6 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);
- /// Reorder commutative or alt operands to get better probability of
- /// generating vectorized code.
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R);
-
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
@@ -3345,27 +3344,15 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set the operands of this bundle in their original order.
- void setOperandsInOrder() {
- assert(Operands.empty() && "Already initialized?");
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
- Operands.resize(I0->getNumOperands());
- unsigned NumLanes = Scalars.size();
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
- OpIdx != NumOperands; ++OpIdx) {
- Operands[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- if (isa<PoisonValue>(Scalars[Lane])) {
- Operands[OpIdx][Lane] =
- PoisonValue::get(I0->getOperand(OpIdx)->getType());
- continue;
- }
- auto *I = cast<Instruction>(Scalars[Lane]);
- assert(I->getNumOperands() == NumOperands &&
- "Expected same number of operands");
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
- }
- }
+ /// Set this bundle's operand from \p VL.
+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
+ bool RequireReorder = false) {
+ VLOperands Ops(VL, R);
+ if (RequireReorder)
+ Ops.reorder();
+ for (unsigned I :
+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
+ setOperand(I, Ops.getVL(I));
}
/// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8458,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperandsInOrder();
+ TE->setOperand(VL, *this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8492,27 +8479,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
+ TE->setOperand(VL, *this);
+ if (State == TreeEntry::ScatterVectorize)
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
@@ -8550,8 +8536,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ TE->setOperand(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8564,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
+ VLOperands Ops(VL, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -8644,20 +8633,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- // Sort operands of the instructions so that each side is more likely to
- // have the same opcode.
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
- }
-
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -8722,7 +8699,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
+ TE->setOperand(VL, *this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8715,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- // Sort operands of the instructions so that each side is more likely to
- // have the same opcode.
- if (isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- Operands.emplace_back();
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
- continue;
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.back().push_back(CI2->getArgOperand(I));
- }
- TE->setOperand(I, Operands.back());
- }
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- if (Operands[I - 2].empty())
- continue;
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
- }
- return;
- }
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
+ TE->setOperand(VL, *this, isCommutative(VL0));
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.push_back(CI2->getArgOperand(I));
- }
- buildTree_rec(Operands, Depth + 1, {TE, I});
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
}
return;
}
@@ -8788,43 +8732,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
- if (isa<BinaryOperator>(VL0) || CI) {
+ if (CI && any_of(VL, [](Value *V) {
+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
+ })) {
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
+ CmpInst::Predicate MainP = MainCI->getPredicate();
+ CmpInst::Predicate AltP = AltCI->getPredicate();
+ assert(MainP != AltP &&
+ "Expected different main/alternate predicates.");
ValueList Left, Right;
- if (!CI || all_of(VL, [](Value *V) {
- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
- })) {
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- } else {
- auto *MainCI = cast<CmpInst>(S.getMainOp());
- auto *AltCI = cast<CmpInst>(S.getAltOp());
- CmpInst::Predicate MainP = MainCI->getPredicate();
- CmpInst::Predicate AltP = AltCI->getPredicate();
- assert(MainP != AltP &&
- "Expected different main/alternate predicates.");
- // Collect operands - commute if it uses the swapped predicate or
- // alternate operation.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Left.push_back(
- PoisonValue::get(MainCI->getOperand(0)->getType()));
- Right.push_back(
- PoisonValue::get(MainCI->getOperand(1)->getType()));
- continue;
- }
- auto *Cmp = cast<CmpInst>(V);
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
+ continue;
+ }
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- } else {
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
- std::swap(LHS, RHS);
- }
- Left.push_back(LHS);
- Right.push_back(RHS);
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
+ } else {
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+ std::swap(LHS, RHS);
}
+ Left.push_back(LHS);
+ Right.push_back(RHS);
}
TE->setOperand(0, Left);
TE->setOperand(1, Right);
@@ -8833,8 +8771,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -13539,21 +13477,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}
-// Perform operand reordering on the instructions in VL and return the reordered
-// operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R) {
- if (VL.empty())
- return;
- VLOperands Ops(VL, R);
- // Reorder the operands in place.
- Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
-}
-
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
>From 142bd19865755dedadf23d13e9ccff818103987c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 10 Dec 2024 09:42:20 -0800
Subject: [PATCH 6/6] apply comment (do not pass VL0 to setOperand)
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 22 +++++++++----------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 81c6d85484206b..73bb110faadb64 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3342,12 +3342,11 @@ class BoUpSLP {
}
/// Set this bundle's operand from Scalars.
- void setOperand(Instruction *VL0, const BoUpSLP &R,
- bool RequireReorder = false) {
- VLOperands Ops(Scalars, VL0, R);
+ void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
+ VLOperands Ops(Scalars, MainOp, R);
if (RequireReorder)
Ops.reorder();
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
setOperand(I, Ops.getVL(I));
}
@@ -8454,7 +8453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperand(VL0, *this);
+ TE->setOperand(*this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8492,7 +8491,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
- TE->setOperand(VL0, *this);
+ TE->setOperand(*this);
if (State == TreeEntry::ScatterVectorize)
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
@@ -8532,7 +8531,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperand(VL0, *this);
+ TE->setOperand(*this);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
@@ -8629,8 +8628,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
- TE->setOperand(VL0, *this,
- isa<BinaryOperator>(VL0) && isCommutative(VL0));
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
@@ -8696,7 +8694,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperand(VL0, *this);
+ TE->setOperand(*this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8712,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
- TE->setOperand(VL0, *this, isCommutative(VL0));
+ TE->setOperand(*this, isCommutative(VL0));
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
@@ -8768,7 +8766,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- TE->setOperand(VL0, *this, isa<BinaryOperator>(VL0) || CI);
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
More information about the llvm-commits
mailing list