[llvm] 42bce72 - Reapply "[SLP] Extend reordering data of tree entry to support PHInodes".
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 8 07:53:32 PST 2022
Author: skc7
Date: 2022-11-08T21:21:28+05:30
New Revision: 42bce72536ad38e3ddf01359b2f85cda649679b3
URL: https://github.com/llvm/llvm-project/commit/42bce72536ad38e3ddf01359b2f85cda649679b3
DIFF: https://github.com/llvm/llvm-project/commit/42bce72536ad38e3ddf01359b2f85cda649679b3.diff
LOG: Reapply "[SLP] Extend reordering data of tree entry to support PHInodes".
Reapplies 87a2086 (which was reverted in 656f1d8).
Fix for scalable vectors in getInsertIndex merged in 46d53f4.
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D137537
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb22d9f19e049..1449cec3b6895 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3797,6 +3797,51 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
return None;
}
+/// Check if two insertelement instructions are from the same buildvector.
+static bool areTwoInsertFromSameBuildVector(
+ InsertElementInst *VU, InsertElementInst *V,
+ function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
+ // Instructions must be from the same basic blocks.
+ if (VU->getParent() != V->getParent())
+ return false;
+ // Checks if 2 insertelements are from the same buildvector.
+ if (VU->getType() != V->getType())
+ return false;
+ // Multiple used inserts are separate nodes.
+ if (!VU->hasOneUse() && !V->hasOneUse())
+ return false;
+ auto *IE1 = VU;
+ auto *IE2 = V;
+ Optional<unsigned> Idx1 = getInsertIndex(IE1);
+ Optional<unsigned> Idx2 = getInsertIndex(IE2);
+ if (Idx1 == None || Idx2 == None)
+ return false;
+ // Go through the vector operand of insertelement instructions trying to find
+ // either VU as the original vector for IE2 or V as the original vector for
+ // IE1.
+ do {
+ if (IE2 == VU)
+ return VU->hasOneUse();
+ if (IE1 == V)
+ return V->hasOneUse();
+ if (IE1) {
+ if ((IE1 != VU && !IE1->hasOneUse()) ||
+ getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
+ IE1 = nullptr;
+ else
+ IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
+ }
+ if (IE2) {
+ if ((IE2 != V && !IE2->hasOneUse()) ||
+ getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
+ IE2 = nullptr;
+ else
+ IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
+ }
+ } while (IE1 || IE2);
+ return false;
+}
+
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
bool TopToBottom) {
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -3860,6 +3905,58 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
!TE.isAltShuffle())
return TE.ReorderIndices;
+ if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
+ auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
+ if (!V1->hasOneUse() || !V2->hasOneUse())
+ return false;
+ auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
+ auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
+ if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
+ if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
+ if (!areTwoInsertFromSameBuildVector(
+ IE1, IE2,
+ [](InsertElementInst *II) { return II->getOperand(0); }))
+ return false;
+ Optional<unsigned> Idx1 = getInsertIndex(IE1);
+ Optional<unsigned> Idx2 = getInsertIndex(IE2);
+ if (Idx1 == None || Idx2 == None)
+ return false;
+ return *Idx1 < *Idx2;
+ }
+ if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
+ if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
+ if (EE1->getOperand(0) != EE2->getOperand(0))
+ return false;
+ Optional<unsigned> Idx1 = getExtractIndex(EE1);
+ Optional<unsigned> Idx2 = getExtractIndex(EE2);
+ if (Idx1 == None || Idx2 == None)
+ return false;
+ return *Idx1 < *Idx2;
+ }
+ return false;
+ };
+ auto IsIdentityOrder = [](const OrdersType &Order) {
+ for (unsigned Idx : seq<unsigned>(0, Order.size()))
+ if (Idx != Order[Idx])
+ return false;
+ return true;
+ };
+ if (!TE.ReorderIndices.empty())
+ return TE.ReorderIndices;
+ DenseMap<Value *, unsigned> PhiToId;
+ SmallVector<Value *, 4> Phis;
+ OrdersType ResOrder(TE.Scalars.size());
+ for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
+ PhiToId[TE.Scalars[Id]] = Id;
+ Phis.push_back(TE.Scalars[Id]);
+ }
+ llvm::stable_sort(Phis, PHICompare);
+ for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
+ ResOrder[Id] = PhiToId[Phis[Id]];
+ if (IsIdentityOrder(ResOrder))
+ return {};
+ return ResOrder;
+ }
if (TE.State == TreeEntry::NeedToGather) {
// TODO: add analysis of other gather nodes with extractelement
// instructions and other values/instructions, not only undefs.
@@ -3937,6 +4034,9 @@ void BoUpSLP::reorderTopToBottom() {
// their ordering.
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+ // Phi nodes can have preferred ordering based on their result users
+ DenseMap<const TreeEntry *, OrdersType> PhisToOrders;
+
// AltShuffles can also have a preferred ordering that leads to fewer
// instructions, e.g., the addsub instruction in x86.
DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
@@ -3951,7 +4051,7 @@ void BoUpSLP::reorderTopToBottom() {
// extracts.
for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
&GathersToOrders, &ExternalUserReorderMap,
- &AltShufflesToOrders](
+ &AltShufflesToOrders, &PhisToOrders](
const std::unique_ptr<TreeEntry> &TE) {
// Look for external users that will probably be vectorized.
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
@@ -4008,6 +4108,9 @@ void BoUpSLP::reorderTopToBottom() {
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ if (TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI)
+ PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
}
});
@@ -4033,8 +4136,8 @@ void BoUpSLP::reorderTopToBottom() {
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
continue;
// Count number of orders uses.
- const auto &Order = [OpTE, &GathersToOrders,
- &AltShufflesToOrders]() -> const OrdersType & {
+ const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
+ &PhisToOrders]() -> const OrdersType & {
if (OpTE->State == TreeEntry::NeedToGather ||
!OpTE->ReuseShuffleIndices.empty()) {
auto It = GathersToOrders.find(OpTE);
@@ -4046,6 +4149,12 @@ void BoUpSLP::reorderTopToBottom() {
if (It != AltShufflesToOrders.end())
return It->second;
}
+ if (OpTE->State == TreeEntry::Vectorize &&
+ OpTE->getOpcode() == Instruction::PHI) {
+ auto It = PhisToOrders.find(OpTE);
+ if (It != PhisToOrders.end())
+ return It->second;
+ }
return OpTE->ReorderIndices;
}();
// First consider the order of the external scalar users.
@@ -7140,51 +7249,6 @@ InstructionCost BoUpSLP::getSpillCost() const {
return Cost;
}
-/// Check if two insertelement instructions are from the same buildvector.
-static bool areTwoInsertFromSameBuildVector(
- InsertElementInst *VU, InsertElementInst *V,
- function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
- // Instructions must be from the same basic blocks.
- if (VU->getParent() != V->getParent())
- return false;
- // Checks if 2 insertelements are from the same buildvector.
- if (VU->getType() != V->getType())
- return false;
- // Multiple used inserts are separate nodes.
- if (!VU->hasOneUse() && !V->hasOneUse())
- return false;
- auto *IE1 = VU;
- auto *IE2 = V;
- Optional<unsigned> Idx1 = getInsertIndex(IE1);
- Optional<unsigned> Idx2 = getInsertIndex(IE2);
- if (Idx1 == None || Idx2 == None)
- return false;
- // Go through the vector operand of insertelement instructions trying to find
- // either VU as the original vector for IE2 or V as the original vector for
- // IE1.
- do {
- if (IE2 == VU)
- return VU->hasOneUse();
- if (IE1 == V)
- return V->hasOneUse();
- if (IE1) {
- if ((IE1 != VU && !IE1->hasOneUse()) ||
- getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
- IE1 = nullptr;
- else
- IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
- }
- if (IE2) {
- if ((IE2 != V && !IE2->hasOneUse()) ||
- getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
- IE2 = nullptr;
- else
- IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
- }
- } while (IE1 || IE2);
- return false;
-}
-
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
/// buildvector sequence.
static bool isFirstInsertElement(const InsertElementInst *IE1,
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
index 5dff4be7493f9..f870fb3a9bc1f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
@@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
@@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x half> [[TMP12]]
More information about the llvm-commits
mailing list