[llvm] [LV] Split checking if tail-folding is possible, collecting masked ops. (PR #77612)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 01:33:35 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/77612
>From 2aaf792f911c6eff3112544be1f101a4fa3ae85e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 10 Jan 2024 14:17:28 +0000
Subject: [PATCH] [LV] Split checking if tail-folding is possible, collecting
masked ops.
Introduce new canFoldTail helper which only checks if tail-folding is
possible, but without modifying MaskedOps.
Just because tail-folding is possible doesn't mean the tail will be
folded; that's up to the cost-model to decide. Separating the check if
tail-folding is possible and preparing for tail-folding makes sure that
MaskedOps is only populated when tail-folding is actually selected.
This allows only creating the header mask if needed after
https://github.com/llvm/llvm-project/pull/76635.
---
.../Vectorize/LoopVectorizationLegality.h | 9 ++++++---
.../Vectorize/LoopVectorizationLegality.cpp | 20 +++++++++++++++++--
.../Transforms/Vectorize/LoopVectorize.cpp | 5 ++++-
3 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index a509ebf6a7e1b..af23813da569b 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -276,9 +276,12 @@ class LoopVectorizationLegality {
bool canVectorizeFPMath(bool EnableStrictReductions);
/// Return true if we can vectorize this loop while folding its tail by
- /// masking, and mark all respective loads/stores for masking.
- /// This object's state is only modified iff this function returns true.
- bool prepareToFoldTailByMasking();
+ /// masking.
+ bool canFoldTailByMasking() const;
+
+ /// Mark all respective loads/stores for masking. Must only be called when
+ /// ail-folding is possible.
+ void prepareToFoldTailByMasking();
/// Returns the primary induction variable.
PHINode *getPrimaryInduction() { return PrimaryInduction; }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index d306524ae5113..c99b3f63c6921 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1543,7 +1543,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
-bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
+bool LoopVectorizationLegality::canFoldTailByMasking() const {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
@@ -1601,8 +1601,24 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
- MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
return true;
}
+void LoopVectorizationLegality::prepareToFoldTailByMasking() {
+ // The list of pointers that we can safely read and write to remains empty.
+ SmallPtrSet<Value *, 8> SafePointers;
+
+ // Collect masked ops in temporary set first to avoid partially populating
+ // MaskedOp if a block cannot be predicated.
+ SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
+
+ // Check and mark all blocks for predication, including those that ordinarily
+ // do not need predication such as the header block.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
+ (void)R;
+ assert(R && "Must be able to predicate block when tail-folding.");
+ }
+}
+
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 417c577e3c5dc..208246f1a887e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1502,7 +1502,7 @@ class LoopVectorizationCostModel {
/// \param UserIC User specific interleave count.
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
- if (!Legal->prepareToFoldTailByMasking()) {
+ if (!Legal->canFoldTailByMasking()) {
ChosenTailFoldingStyle =
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
return;
@@ -7226,6 +7226,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
CM.invalidateCostModelingDecisions();
}
+ if (CM.foldTailByMasking())
+ Legal->prepareToFoldTailByMasking();
+
ElementCount MaxUserVF =
UserVF.isScalable() ? MaxFactors.ScalableVF : MaxFactors.FixedVF;
bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxUserVF);
More information about the llvm-commits
mailing list