[llvm-branch-commits] [llvm] 9cd1cd6 - Fix
Florian Hahn via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Sep 25 12:29:39 PDT 2021
Author: Florian Hahn
Date: 2021-09-25T15:15:54+01:00
New Revision: 9cd1cd6629a80374618b6a5adff68addfa779bd5
URL: https://github.com/llvm/llvm-project/commit/9cd1cd6629a80374618b6a5adff68addfa779bd5
DIFF: https://github.com/llvm/llvm-project/commit/9cd1cd6629a80374618b6a5adff68addfa779bd5.diff
LOG: Fix
Differential Revision: https://reviews.llvm.org/D75981
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 870e7175c9221..c3a440ecc9b85 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -26,6 +26,8 @@
#include "VPlan.h"
+class GeneratedRTChecks;
+
namespace llvm {
class LoopInfo;
@@ -183,12 +185,16 @@ struct VectorizationFactor {
/// Cost of the loop with that width.
InstructionCost Cost;
- VectorizationFactor(ElementCount Width, InstructionCost Cost)
- : Width(Width), Cost(Cost) {}
+ /// Cost of the scalar loop.
+ InstructionCost ScalarCost;
+
+ VectorizationFactor(ElementCount Width, InstructionCost Cost,
+ InstructionCost ScalarCost)
+ : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
/// Width 1 means no vectorization, cost 0 means uncomputed cost.
static VectorizationFactor Disabled() {
- return {ElementCount::getFixed(1), 0};
+ return {ElementCount::getFixed(1), 0, 0};
}
bool operator==(const VectorizationFactor &rhs) const {
@@ -289,7 +295,8 @@ class LoopVectorizationPlanner {
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
- Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
+ Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC,
+ GeneratedRTChecks &Checks);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e7a79ae69d2b8..8db7ecc7cd7ef 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -423,7 +423,6 @@ static Optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) {
return None;
}
-// Forward declare GeneratedRTChecks.
class GeneratedRTChecks;
namespace llvm {
@@ -1634,6 +1633,17 @@ class LoopVectorizationCostModel {
Scalars.clear();
}
+ /// The vectorization cost is a combination of the cost itself and a boolean
+ /// indicating whether any of the contributing operations will actually
+ /// operate on vector values after type legalization in the backend. If this
+ /// latter value is false, then all operations will be scalarized (i.e. no
+ /// vectorization has actually taken place).
+ using VectorizationCostTy = std::pair<InstructionCost, bool>;
+
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
private:
unsigned NumPredStores = 0;
@@ -1662,13 +1672,6 @@ class LoopVectorizationCostModel {
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
- /// The vectorization cost is a combination of the cost itself and a boolean
- /// indicating whether any of the contributing operations will actually
- /// operate on vector values after type legalization in the backend. If this
- /// latter value is false, then all operations will be scalarized (i.e. no
- /// vectorization has actually taken place).
- using VectorizationCostTy = std::pair<InstructionCost, bool>;
-
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare
diff erent
/// vector widths. The cost that is returned is *not* normalized by
@@ -1680,10 +1683,6 @@ class LoopVectorizationCostModel {
expectedCost(ElementCount VF,
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
- /// Returns the execution time cost of an instruction for a given vector
- /// width. Vector width of one means scalar.
- VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
-
/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
@@ -6042,7 +6041,8 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
assert(VFCandidates.count(ElementCount::getFixed(1)) &&
"Expected Scalar VF to be a candidate");
- const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost);
+ const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
+ ExpectedCost);
VectorizationFactor ChosenFactor = ScalarCost;
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
@@ -6060,7 +6060,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
continue;
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
- VectorizationFactor Candidate(i, C.first);
+ VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
LLVM_DEBUG(
dbgs() << "LV: Vector loop of width " << i << " costs: "
<< (Candidate.Cost / Candidate.Width.getKnownMinValue())
@@ -6251,7 +6251,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
if (LVP.hasPlanWithVFs(
{MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)}))
- return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0};
+ return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0, 0};
else {
LLVM_DEBUG(
dbgs()
@@ -8079,7 +8079,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
if (VPlanBuildStressTest)
return VectorizationFactor::Disabled();
- return {VF, 0 /*Cost*/};
+ return {VF, 0 /*Cost*/, 0 /* ScalarCost */};
}
LLVM_DEBUG(
@@ -8089,7 +8089,8 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
}
Optional<VectorizationFactor>
-LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC,
+ GeneratedRTChecks &Checks) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
@@ -8122,7 +8123,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
CM.collectInLoopReductions();
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0}};
+ Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate());
+ return {{UserVF, 0, 0}};
} else
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
@@ -8158,6 +8160,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
// Select the optimal vectorization factor.
auto SelectedVF = CM.selectVectorizationFactor(VFCandidates);
+ if (!SelectedVF.Width.isScalar())
+ Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate());
+
// Check if it is profitable to vectorize with runtime checks.
unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) {
@@ -10276,8 +10281,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ F->getParent()->getDataLayout());
// Plan how to best vectorize, return the best VF and its cost.
- Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
+ Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC, Checks);
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
@@ -10373,13 +10380,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool DisableRuntimeUnroll = false;
MDNode *OrigLoopID = L->getLoopID();
{
- // Optimistically generate runtime checks. Drop them if they turn out to not
- // be profitable. Limit the scope of Checks, so the cleanup happens
- // immediately after vector codegeneration is done.
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
- F->getParent()->getDataLayout());
- if (!VF.Width.isScalar() || IC > 1)
- Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
LVP.setBestPlan(VF.Width, IC);
using namespace ore;
More information about the llvm-branch-commits
mailing list