[llvm] Initial commit (PR #184546)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 21:05:57 PST 2026
https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/184546
None
>From 87680c8e259f47d4f91ef0b12bf4152c963fb7c3 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 4 Mar 2026 05:05:27 +0000
Subject: [PATCH] Initial commit
---
.../llvm/Analysis/TargetTransformInfo.h | 4 +++
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 1 +
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 35 ++++++++++++++++++-
3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 18ae6a005d972..37aa78fe120b6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -732,6 +732,10 @@ class TargetTransformInfo {
bool RuntimeUnrollMultiExit;
/// Allow unrolling to add parallel reduction phis.
bool AddAdditionalAccumulators;
+ /// Multiply the full-unroll Threshold when the loop contains
+ /// loop-dependent accesses to alloca-backed arrays whose
+ /// elimination would benefit from full unrolling. Default 1 (no boost).
+ unsigned LoopDependentMemoryAccessThresholdMultiplier = 1;
};
/// Get target-customized preferences for the generic loop unrolling
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index c1fe9300785a3..a4239982b3172 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -537,6 +537,7 @@ void NVPTXTTIImpl::getUnrollingPreferences(
// beneficial.
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
+ UP.LoopDependentMemoryAccessThresholdMultiplier = 8;
}
void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 3e2ed34b3c67d..f3947046a2924 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -843,6 +844,35 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
return std::nullopt;
}
+/// Return true if \p L contains a load or store to an alloca whose address
+/// is loop-dependent. Full-unrolling such loops can eliminate the alloca
+/// entirely once all constant-index accesses are visible to SROA.
+static bool hasLoopDependentArrayAccess(const Loop *L, ScalarEvolution &SE) {
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : *BB) {
+ Value *Ptr = nullptr;
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ Ptr = LI->getPointerOperand();
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ Ptr = SI->getPointerOperand();
+ else
+ continue;
+
+ SmallVector<const Value *, 4> Objects;
+ getUnderlyingObjects(Ptr, Objects, /*LI=*/nullptr, /*MaxLookup=*/10);
+ for (const Value *Obj : Objects) {
+ if (!isa<AllocaInst>(Obj))
+ continue;
+ const SCEV *PtrSCEV = SE.getSCEV(Ptr);
+ if (SE.getLoopDisposition(PtrSCEV, L) !=
+ ScalarEvolution::LoopInvariant)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static std::optional<unsigned> shouldFullUnroll(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
@@ -855,7 +885,10 @@ static std::optional<unsigned> shouldFullUnroll(
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+ unsigned Threshold = UP.Threshold;
+ if (hasLoopDependentArrayAccess(L, SE))
+ Threshold *= UP.LoopDependentMemoryAccessThresholdMultiplier;
+ if (UCE.getUnrolledLoopSize(UP) < Threshold)
return FullUnrollTripCount;
// The loop isn't that small, but we still can fully unroll it if that
More information about the llvm-commits
mailing list