[llvm] Initial commit (PR #184546)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 21:05:57 PST 2026


https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/184546

None

>From 87680c8e259f47d4f91ef0b12bf4152c963fb7c3 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 4 Mar 2026 05:05:27 +0000
Subject: [PATCH] Initial commit

---
 .../llvm/Analysis/TargetTransformInfo.h       |  4 +++
 .../Target/NVPTX/NVPTXTargetTransformInfo.cpp |  1 +
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 35 ++++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 18ae6a005d972..37aa78fe120b6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -732,6 +732,10 @@ class TargetTransformInfo {
     bool RuntimeUnrollMultiExit;
     /// Allow unrolling to add parallel reduction phis.
     bool AddAdditionalAccumulators;
+    /// Multiply the full-unroll Threshold when the loop contains
+    /// loop-dependent accesses to alloca-backed arrays whose
+    /// elimination would benefit from full unrolling. Default 1 (no boost).
+    unsigned LoopDependentMemoryAccessThresholdMultiplier = 1;
   };
 
   /// Get target-customized preferences for the generic loop unrolling
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index c1fe9300785a3..a4239982b3172 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -537,6 +537,7 @@ void NVPTXTTIImpl::getUnrollingPreferences(
   // beneficial.
   UP.Partial = UP.Runtime = true;
   UP.PartialThreshold = UP.Threshold / 4;
+  UP.LoopDependentMemoryAccessThresholdMultiplier = 8;
 }
 
 void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 3e2ed34b3c67d..f3947046a2924 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
@@ -843,6 +844,35 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
   return std::nullopt;
 }
 
+/// Return true if \p L contains a load or store to an alloca whose address
+/// is loop-dependent.  Full-unrolling such loops can eliminate the alloca
+/// entirely once all constant-index accesses are visible to SROA.
+static bool hasLoopDependentArrayAccess(const Loop *L, ScalarEvolution &SE) {
+  for (BasicBlock *BB : L->blocks()) {
+    for (Instruction &I : *BB) {
+      Value *Ptr = nullptr;
+      if (auto *LI = dyn_cast<LoadInst>(&I))
+        Ptr = LI->getPointerOperand();
+      else if (auto *SI = dyn_cast<StoreInst>(&I))
+        Ptr = SI->getPointerOperand();
+      else
+        continue;
+
+      SmallVector<const Value *, 4> Objects;
+      getUnderlyingObjects(Ptr, Objects, /*LI=*/nullptr, /*MaxLookup=*/10);
+      for (const Value *Obj : Objects) {
+        if (!isa<AllocaInst>(Obj))
+          continue;
+        const SCEV *PtrSCEV = SE.getSCEV(Ptr);
+        if (SE.getLoopDisposition(PtrSCEV, L) !=
+            ScalarEvolution::LoopInvariant)
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 static std::optional<unsigned> shouldFullUnroll(
     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
@@ -855,7 +885,10 @@ static std::optional<unsigned> shouldFullUnroll(
 
   // When computing the unrolled size, note that BEInsns are not replicated
   // like the rest of the loop body.
-  if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+  unsigned Threshold = UP.Threshold;
+  if (hasLoopDependentArrayAccess(L, SE))
+    Threshold *= UP.LoopDependentMemoryAccessThresholdMultiplier;
+  if (UCE.getUnrolledLoopSize(UP) < Threshold)
     return FullUnrollTripCount;
 
   // The loop isn't that small, but we still can fully unroll it if that



More information about the llvm-commits mailing list