[llvm] 89b67a6 - [SLP] Cluster SortedBases before sorting. (#101144)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 14:12:23 PDT 2024
Author: David Green
Date: 2024-07-30T22:12:20+01:00
New Revision: 89b67a640063189bbd727ade4c912f6e48fee5a0
URL: https://github.com/llvm/llvm-project/commit/89b67a640063189bbd727ade4c912f6e48fee5a0
DIFF: https://github.com/llvm/llvm-project/commit/89b67a640063189bbd727ade4c912f6e48fee5a0.diff
LOG: [SLP] Cluster SortedBases before sorting. (#101144)
In order to enforce a strict-weak ordering, this patch clusters the
bases that are being sorted by the root - the first value in a gep
chain. The sorting is then performed in each cluster.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cf87b8621027f..60c20aa2ebb5c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4843,25 +4843,43 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
return false;
// If we have a better order, also sort the base pointers by increasing
- // (variable) values if possible, to try and keep the order more regular.
- SmallVector<std::pair<Value *, Value *>> SortedBases;
- for (auto &Base : Bases)
- SortedBases.emplace_back(Base.first,
- Base.first->stripInBoundsConstantOffsets());
- llvm::stable_sort(SortedBases, [](std::pair<Value *, Value *> V1,
- std::pair<Value *, Value *> V2) {
- const Value *V = V2.second;
- while (auto *Gep = dyn_cast<GetElementPtrInst>(V)) {
- if (Gep->getOperand(0) == V1.second)
- return true;
- V = Gep->getOperand(0);
- }
- return false;
- });
+ // (variable) values if possible, to try and keep the order more regular. In
+ // order to create a valid strict-weak order we cluster by the Root of gep
+ // chains and sort within each.
+ SmallVector<std::tuple<Value *, Value *, Value *>> SortedBases;
+ for (auto &Base : Bases) {
+ Value *Strip = Base.first->stripInBoundsConstantOffsets();
+ Value *Root = Strip;
+ while (auto *Gep = dyn_cast<GetElementPtrInst>(Root))
+ Root = Gep->getOperand(0);
+ SortedBases.emplace_back(Base.first, Strip, Root);
+ }
+ auto *Begin = SortedBases.begin();
+ auto *End = SortedBases.end();
+ while (Begin != End) {
+ Value *Root = std::get<2>(*Begin);
+ auto *Mid = std::stable_partition(
+ Begin, End, [&Root](auto V) { return std::get<2>(V) == Root; });
+ DenseMap<Value *, DenseMap<Value *, bool>> LessThan;
+ for (auto I = Begin; I < Mid; ++I)
+ LessThan.try_emplace(std::get<1>(*I));
+ for (auto I = Begin; I < Mid; ++I) {
+ Value *V = std::get<1>(*I);
+ while (auto *Gep = dyn_cast<GetElementPtrInst>(V)) {
+ V = Gep->getOperand(0);
+ if (LessThan.contains(V))
+ LessThan[V][std::get<1>(*I)] = true;
+ }
+ }
+ std::stable_sort(Begin, Mid, [&LessThan](auto &V1, auto &V2) {
+ return LessThan[std::get<1>(V1)][std::get<1>(V2)];
+ });
+ Begin = Mid;
+ }
// Collect the final order of sorted indices
for (auto Base : SortedBases)
- for (auto &T : Bases[Base.first])
+ for (auto &T : Bases[std::get<0>(Base)])
SortedIndices.push_back(std::get<2>(T));
assert(SortedIndices.size() == VL.size() &&
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 6b5503f26fabf..d79aed89b0be7 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -428,14 +428,14 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; CHECK-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[TMP12]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
More information about the llvm-commits
mailing list