[llvm] 774fc63 - [LV] Consider minimum vscale assmuption for RT check cost.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 5 01:42:45 PDT 2022
Author: Florian Hahn
Date: 2022-07-05T09:41:58+01:00
New Revision: 774fc63490939c1afdc02f20ec48467991d3c548
URL: https://github.com/llvm/llvm-project/commit/774fc63490939c1afdc02f20ec48467991d3c548
DIFF: https://github.com/llvm/llvm-project/commit/774fc63490939c1afdc02f20ec48467991d3c548.diff
LOG: [LV] Consider minimum vscale assmuption for RT check cost.
For scalable VFs, the minimum assumed vscale needs to be included in the
cost-computation, otherwise a smaller VF may be used for RT check cost
computation than was used for earlier cost computations.
Fixes a RISCV test failing with UBSan due to both scalar and vector
loops having the same cost.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cdf9d4d96e94..03f1d9ec5b37 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1559,14 +1559,14 @@ class LoopVectorizationCostModel {
Scalars.clear();
}
-private:
- unsigned NumPredStores = 0;
-
/// Convenience function that returns the value of vscale_range iff
/// vscale_range.min == vscale_range.max or otherwise returns the value
/// returned by the corresponding TLI method.
Optional<unsigned> getVScaleForTuning() const;
+private:
+ unsigned NumPredStores = 0;
+
/// \return An upper bound for the vectorization factors for both
/// fixed and scalable vectorization, where the minimum-known number of
/// elements is a power-of-2 larger than zero. If scalable vectorization is
@@ -10243,7 +10243,8 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
}
static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
- VectorizationFactor &VF, Loop *L,
+ VectorizationFactor &VF,
+ Optional<unsigned> VScale, Loop *L,
ScalarEvolution &SE) {
InstructionCost CheckCost = Checks.getCost();
if (!CheckCost.isValid())
@@ -10295,6 +10296,12 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
// the computations are performed on doubles, not integers and the result
// is rounded up, hence we get an upper estimate of the TC.
unsigned IntVF = VF.Width.getKnownMinValue();
+ if (VF.Width.isScalable()) {
+ unsigned AssumedMinimumVscale = 1;
+ if (VScale)
+ AssumedMinimumVscale = *VScale;
+ IntVF *= AssumedMinimumVscale;
+ }
double VecCOverVF = double(*VF.Cost.getValue()) / IntVF;
double RtC = *CheckCost.getValue();
double MinTC1 = RtC / (ScalarC - VecCOverVF);
@@ -10308,6 +10315,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
// RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
double MinTC2 = RtC * 10 / ScalarC;
+ dbgs() << ScalarC << " " << RtC << " " << VecCOverVF << "\n";
// Now pick the larger minimum. If it is not a multiple of VF, choose the
// next closest multiple of VF. This should partly compensate for ignoring
// the epilogue cost.
@@ -10520,7 +10528,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (!ForceVectorization &&
- !areRuntimeChecksProfitable(Checks, VF, L, *PSE.getSE())) {
+ !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
+ *PSE.getSE())) {
ORE->emit([&]() {
return OptimizationRemarkAnalysisAliasing(
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
More information about the llvm-commits
mailing list