[llvm] [LoopUnroll] Enhance the use of Optimization Remarks and LLVM_DEBUG (PR #178476)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 28 10:51:57 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Justin Fargnoli (justinfargnoli)
<details>
<summary>Changes</summary>
Add/refine uses of ORE and LLVM_DEBUG to aid in debugging the unroller's heuristics. In particular:
- Ensure we always use OptimizationRemarkMissed when we fail to perform unrolling that the user requested.
- Use generic terminology (e.g., "full unroll pragma" instead of "unroll(full)") in diagnostic messages to avoid tying LLVM to clang's pragma unrolling syntax.
- Use indentation in LLVM_DEBUG output to reflect the depth of the call graph, making it easier to follow the unroller's logic when reading debug output.
---
Patch is 109.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/178476.diff
8 Files Affected:
- (modified) llvm/include/llvm/Transforms/Utils/UnrollLoop.h (+4-2)
- (modified) llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp (+332-83)
- (modified) llvm/lib/Transforms/Utils/LoopUnroll.cpp (+11-6)
- (added) llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll (+1528)
- (modified) llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll (+15-15)
- (modified) llvm/test/Transforms/LoopUnroll/gh-issue77118-broken-lcssa-form.ll (+4-4)
- (modified) llvm/test/Transforms/LoopUnroll/guard-cost-for-unrolling.ll (+2-2)
- (modified) llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll (+5-5)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index a3efc43c62dc3..cdce8f5bf21dd 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -145,8 +145,10 @@ class UnrollCostEstimator {
const SmallPtrSetImpl<const Value *> &EphValues,
unsigned BEInsns);
- /// Whether it is legal to unroll this loop.
- LLVM_ABI bool canUnroll() const;
+ /// Whether it is legal to unroll this loop. If \p ORE and \p L are provided,
+ /// emit an optimization remark on failure.
+ LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE = nullptr,
+ const Loop *L = nullptr) const;
uint64_t getRolledLoopSize() const { return LoopSize.getValue(); }
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 802ae4e9c28e3..00a336013d852 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -142,8 +142,8 @@ static cl::opt<unsigned> UnrollMaxUpperBound(
static cl::opt<unsigned> PragmaUnrollThreshold(
"pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
- cl::desc("Unrolled size limit for loops with an unroll(full) or "
- "unroll_count pragma."));
+ cl::desc("Unrolled size limit for loops with unroll metadata "
+ "(full, enable, or count)."));
static cl::opt<unsigned> FlatLoopTripCountThreshold(
"flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
@@ -368,12 +368,18 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Only analyze inner loops. We can't properly estimate cost of nested loops
// and we won't visit inner loops again anyway.
- if (!L->isInnermost())
+ if (!L->isInnermost()) {
+ LLVM_DEBUG(
+ dbgs() << " Not analyzing loop cost: not an innermost loop.\n");
return std::nullopt;
+ }
// Don't simulate loops with a big or unknown tripcount
- if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
+ if (!TripCount || TripCount > MaxIterationsCountToAnalyze) {
+ LLVM_DEBUG(dbgs() << " Not analyzing loop cost: trip count "
+ << (TripCount ? "too large" : "unknown") << ".\n");
return std::nullopt;
+ }
SmallSetVector<BasicBlock *, 16> BBWorklist;
SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
@@ -463,7 +469,7 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
return Op;
});
UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
- LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
+ LLVM_DEBUG(dbgs() << " Adding cost of instruction (iteration "
<< Iteration << "): ");
LLVM_DEBUG(I->dump());
}
@@ -500,7 +506,7 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
assert(L->isLCSSAForm(DT) &&
"Must have loops in LCSSA form to track live-out values.");
- LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
+ LLVM_DEBUG(dbgs() << " Starting LoopUnroll profitability analysis...\n");
TargetTransformInfo::TargetCostKind CostKind =
L->getHeader()->getParent()->hasMinSize() ?
@@ -510,7 +516,7 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// Since the same load will take different values on different iterations,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
- LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+ LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
// Prepare for the iteration by collecting any simplified entry or backedge
// inputs.
@@ -576,7 +582,7 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
if (auto *CI = dyn_cast<CallInst>(&I)) {
const Function *Callee = CI->getCalledFunction();
if (!Callee || TTI.isLoweredToCall(Callee)) {
- LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
+ LLVM_DEBUG(dbgs() << " Can't analyze cost of loop with call\n");
return std::nullopt;
}
}
@@ -588,8 +594,8 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// If unrolled body turns out to be too big, bail out.
if (UnrolledCost > MaxUnrolledLoopSize) {
- LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
- << " UnrolledCost: " << UnrolledCost
+ LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost
<< ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
<< "\n");
return std::nullopt;
@@ -648,8 +654,8 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
if (UnrolledCost == RolledDynamicCost) {
- LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
- << " UnrolledCost: " << UnrolledCost << "\n");
+ LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost << "\n");
return std::nullopt;
}
}
@@ -674,9 +680,9 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
"All instructions must have a valid cost, whether the "
"loop is rolled or unrolled.");
- LLVM_DEBUG(dbgs() << "Analysis finished:\n"
- << "UnrolledCost: " << UnrolledCost << ", "
- << "RolledDynamicCost: " << RolledDynamicCost << "\n");
+ LLVM_DEBUG(dbgs() << " Analysis finished:\n"
+ << " UnrolledCost: " << UnrolledCost << ", "
+ << " RolledDynamicCost: " << RolledDynamicCost << "\n");
return {{unsigned(UnrolledCost.getValue()),
unsigned(RolledDynamicCost.getValue())}};
}
@@ -707,20 +713,29 @@ UnrollCostEstimator::UnrollCostEstimator(
LoopSize = BEInsns + 1;
}
-bool UnrollCostEstimator::canUnroll() const {
- switch (Convergence) {
- case ConvergenceKind::ExtendedLoop:
- LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
+bool UnrollCostEstimator::canUnroll(OptimizationRemarkEmitter *ORE,
+ const Loop *L) const {
+ // Helper to emit debug output and a missed optimization remark.
+ auto ReportCannotUnroll = [&](StringRef Reason) {
+ LLVM_DEBUG(dbgs() << " Not unrolling: " << Reason << ".\n");
+ if (ORE && L)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CannotUnrollLoop",
+ L->getStartLoc(), L->getHeader())
+ << "unable to unroll loop: " << Reason;
+ });
+ };
+
+ if (Convergence == ConvergenceKind::ExtendedLoop) {
+ ReportCannotUnroll("contains convergent operations");
return false;
- default:
- break;
}
if (!LoopSize.isValid()) {
- LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
+ ReportCannotUnroll("loop size could not be computed");
return false;
}
if (NotDuplicatable) {
- LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
+ ReportCannotUnroll("contains non-duplicatable instructions");
return false;
}
return true;
@@ -798,40 +813,83 @@ static std::optional<unsigned>
shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
const unsigned TripMultiple, const unsigned TripCount,
unsigned MaxTripCount, const UnrollCostEstimator UCE,
- const TargetTransformInfo::UnrollingPreferences &UP) {
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Using unroll pragma
// 1st priority is unroll count set by "unroll-count" option.
if (PInfo.UserUnrollCount) {
if (UP.AllowRemainder &&
- UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+ UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) {
+ LLVM_DEBUG(dbgs() << " Unrolling with user-specified count: "
+ << UnrollCount << "\n");
return (unsigned)UnrollCount;
+ }
+ LLVM_DEBUG(dbgs() << " Not unrolling with user count " << UnrollCount
+ << ": "
+ << (!UP.AllowRemainder ? "remainder not allowed"
+ : "exceeds threshold")
+ << ".\n");
}
// 2nd priority is unroll count set by pragma.
if (PInfo.PragmaCount > 0) {
- if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0))) {
+ LLVM_DEBUG(dbgs() << " Unrolling with pragma count: "
+ << PInfo.PragmaCount << "\n");
return PInfo.PragmaCount;
+ }
+ LLVM_DEBUG(dbgs() << " Not unrolling with pragma count "
+ << PInfo.PragmaCount
+ << ": remainder not allowed, count does not divide trip "
+ << "multiple " << TripMultiple << ".\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "PragmaUnrollCountRejected",
+ L->getStartLoc(), L->getHeader())
+ << "unable to unroll loop with count "
+ << ore::NV("PragmaCount", PInfo.PragmaCount)
+ << ": remainder loop is restricted and count does not divide "
+ "trip multiple "
+ << ore::NV("TripMultiple", TripMultiple);
+ });
}
- if (PInfo.PragmaFullUnroll && TripCount != 0) {
- // Certain cases with UBSAN can cause trip count to be calculated as
- // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
- // doesn't hang trying to unroll the loop. See PR77842
- if (TripCount > PragmaUnrollFullMaxIterations) {
- LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
- return std::nullopt;
- }
+ if (PInfo.PragmaFullUnroll) {
+ if (TripCount != 0) {
+ // Certain cases with UBSAN can cause trip count to be calculated as
+ // INT_MAX, Block full unrolling at a reasonable limit so that the
+ // compiler doesn't hang trying to unroll the loop. See PR77842
+ if (TripCount > PragmaUnrollFullMaxIterations) {
+ LLVM_DEBUG(dbgs() << " Won't unroll; trip count is too large.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "PragmaFullUnrollTripCountTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "unable to fully unroll loop: trip count "
+ << ore::NV("TripCount", TripCount) << " exceeds limit "
+ << ore::NV("Limit", PragmaUnrollFullMaxIterations);
+ });
+ return std::nullopt;
+ }
- return TripCount;
+ LLVM_DEBUG(dbgs() << " Fully unrolling with trip count: " << TripCount
+ << "\n");
+ return TripCount;
+ }
+ // Note: ORE for unknown trip count is emitted later in computeUnrollCount
+ // after we've exhausted all strategies.
+ LLVM_DEBUG(dbgs() << " Not fully unrolling: unknown trip count.\n");
}
if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
- MaxTripCount <= UP.MaxUpperBound)
+ MaxTripCount <= UP.MaxUpperBound) {
+ LLVM_DEBUG(dbgs() << " Unrolling with max trip count: " << MaxTripCount
+ << "\n");
return MaxTripCount;
+ }
- // if didn't return until here, should continue to other priorties
+ // if didn't return until here, should continue to other priorities
return std::nullopt;
}
@@ -839,16 +897,43 @@ static std::optional<unsigned> shouldFullUnroll(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
- const TargetTransformInfo::UnrollingPreferences &UP) {
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE, bool ExplicitUnroll,
+ bool IsUpperBound = false) {
assert(FullUnrollTripCount && "should be non-zero!");
- if (FullUnrollTripCount > UP.FullUnrollMaxCount)
+ const char *UnrollKind =
+ IsUpperBound ? "upper-bound unrolling" : "full unroll";
+
+ if (FullUnrollTripCount > UP.FullUnrollMaxCount) {
+ LLVM_DEBUG(dbgs() << " Not " << UnrollKind << ": trip count "
+ << FullUnrollTripCount << " exceeds max count "
+ << UP.FullUnrollMaxCount << ".\n");
+ if (ExplicitUnroll)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollTripCountTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "unable to fully unroll loop: trip count "
+ << ore::NV("TripCount", FullUnrollTripCount)
+ << " exceeds maximum full unroll count "
+ << ore::NV("MaxFullUnrollCount", UP.FullUnrollMaxCount);
+ });
return std::nullopt;
+ }
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+ uint64_t UnrolledSize = UCE.getUnrolledLoopSize(UP);
+ if (UnrolledSize < UP.Threshold) {
+ LLVM_DEBUG(dbgs() << " " << UnrollKind << ": size " << UnrolledSize
+ << " < threshold " << UP.Threshold << ".\n");
return FullUnrollTripCount;
+ }
+
+ LLVM_DEBUG(dbgs() << " Unrolled size " << UnrolledSize
+ << " exceeds threshold " << UP.Threshold
+ << "; checking for cost benefit.\n");
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
@@ -859,8 +944,35 @@ static std::optional<unsigned> shouldFullUnroll(
UP.MaxIterationsCountToAnalyze)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
+ unsigned BoostedThreshold = UP.Threshold * Boost / 100;
+ if (Cost->UnrolledCost < BoostedThreshold) {
+ LLVM_DEBUG(dbgs() << " " << UnrollKind
+ << " profitable after cost analysis.\n");
return FullUnrollTripCount;
+ }
+ LLVM_DEBUG(dbgs() << " Not " << UnrollKind << ": cost "
+ << Cost->UnrolledCost << " >= boosted threshold "
+ << BoostedThreshold << ".\n");
+ if (ExplicitUnroll)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollNotProfitable",
+ L->getStartLoc(), L->getHeader())
+ << "unable to fully unroll loop: estimated unrolled cost "
+ << ore::NV("UnrolledCost", Cost->UnrolledCost)
+ << " exceeds boosted threshold "
+ << ore::NV("BoostedThreshold", BoostedThreshold);
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << " Skipping " << UnrollKind
+ << ": cost analysis unavailable.\n");
+ if (ExplicitUnroll)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollSizeTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "unable to fully unroll loop: estimated unrolled size "
+ << ore::NV("UnrolledSize", UnrolledSize) << " exceeds threshold "
+ << ore::NV("Threshold", UP.Threshold);
+ });
}
return std::nullopt;
}
@@ -874,8 +986,8 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
return std::nullopt;
if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
+ LLVM_DEBUG(dbgs() << " Will not try to unroll partially because "
+ << "-unroll-allow-partial not given.\n");
return 0;
}
unsigned count = UP.Count;
@@ -883,9 +995,14 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) {
+ unsigned NewCount =
+ (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ LLVM_DEBUG(dbgs() << " Unrolled size exceeds threshold; reducing count "
+ << "from " << count << " to " << NewCount << ".\n");
+ count = NewCount;
+ }
if (count > UP.MaxCount)
count = UP.MaxCount;
while (count != 0 && TripCount % count != 0)
@@ -895,12 +1012,16 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
+ // Note: DefaultUnrollRuntimeCount is used as a reasonable starting point
+ // even though this is partial unrolling (not runtime unrolling).
count = UP.DefaultUnrollRuntimeCount;
while (count != 0 &&
UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
count >>= 1;
}
if (count < 2) {
+ LLVM_DEBUG(
+ dbgs() << " Will not partially unroll: no profitable count.\n");
count = 0;
}
} else {
@@ -909,14 +1030,14 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
if (count > UP.MaxCount)
count = UP.MaxCount;
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+ LLVM_DEBUG(dbgs() << " Partially unrolling with count: " << count << "\n");
return count;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
-// that are specific to to the LoopUnroll pass (which, for instance, are
+// that are specific to the LoopUnroll pass (which, for instance, are
// irrelevant for the LoopUnrollAndJam pass).
// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
// many LoopUnroll-specific options. The shared functionality should be
@@ -932,6 +1053,11 @@ bool llvm::computeUnrollCount(
unsigned LoopSize = UCE.getRolledLoopSize();
+ LLVM_DEBUG(dbgs() << " Computing unroll count: TripCount=" << TripCount
+ << ", MaxTripCount=" << MaxTripCount
+ << (MaxOrZero ? " (MaxOrZero)" : "")
+ << ", TripMultiple=" << TripMultiple << "\n");
+
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
const unsigned PragmaCount = unrollCountPragmaValue(L);
@@ -940,6 +1066,15 @@ bool llvm::computeUnrollCount(
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
+ LLVM_DEBUG(if (ExplicitUnroll) dbgs()
+ << " Explicit unroll requested: "
+ << (UserUnrollCount ? "user-count " : "")
+ << (PragmaFullUnroll ? "pragma-full " : "")
+ << (PragmaCount > 0
+ ? ("pragma-count(" + Twine(PragmaCount) + ") ").str()
+ : "")
+ << (PragmaEnableUnroll ? "pragma-enable" : "") << "\n");
+
PragmaInfo PInfo(User...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/178476
More information about the llvm-commits
mailing list