[llvm] [AMDGPU][LLVM] Improve unrolling for user-requested loop unrolling via pragma directive (PR #140320)

Fri May 16 17:05:56 PDT 2025

https://github.com/doru1004 created https://github.com/llvm/llvm-project/pull/140320

In certain cases in which the user requests loop unrolling via `pragma unroll` the unroll pass decides that the transformation is not profitable due to the cost model being too conservative. In this patch we relax the thresholds for unrolling in the case in which the pragma unroll is used.

>From b34e885d700ee91edb757128bc3a96200171d996 Mon Sep 17 00:00:00 2001
From: Doru Bercea <doru.bercea at amd.com>
Date: Fri, 16 May 2025 16:52:36 -0500
Subject: [PATCH] Improve unrolling for user-requested loop unrolling via
 pragma directive

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h     |  2 ++
 llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp |  3 +++
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp        | 10 ++++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4e2d37be3a2b2..305a5181ce3cd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -633,6 +633,8 @@ class TargetTransformInfo {
     /// Fall back to the generic logic to determine whether multi-exit unrolling
     /// is profitable if set to false.
     bool RuntimeUnrollMultiExit;
+    // Relax conditions for unrolling when user requests unrolling via pragma.
+    bool RelaxPragmaUnrollThresholds;
   };
 
   /// Get target-customized preferences for the generic loop unrolling
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index c26726c445401..b135c58e52550 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -116,6 +116,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
   UP.MaxCount = std::numeric_limits<unsigned>::max();
   UP.Partial = true;
 
+  // Relax conditions for unrolling when user requests unrolling via pragma.
+  UP.RelaxPragmaUnrollThresholds = true;
+
   // Conditional branch in a loop back edge needs 3 additional exec
   // manipulations in average.
   UP.BEInsns += 3;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d84b74dd0eecc..030fe54091ba4 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -221,6 +221,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
   UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
   UP.SCEVExpansionBudget = SCEVCheapExpansionBudget;
   UP.RuntimeUnrollMultiExit = false;
+  UP.RelaxPragmaUnrollThresholds = false;
 
   // Override with any target specific settings
   TTI.getUnrollingPreferences(L, SE, UP, &ORE);
@@ -939,6 +940,10 @@ bool llvm::computeUnrollCount(
 
   const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
                               PragmaEnableUnroll || UserUnrollCount;
+  // If enabled, relax unrolling thresholds when pragma unroll is used.
+  const bool RelaxUnrollThrehsholds = UP.RelaxPragmaUnrollThresholds &&
+                                      (PragmaEnableUnroll && !UserUnrollCount &&
+                                       !PragmaFullUnroll && PragmaCount == 0);
 
   PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
                    PragmaEnableUnroll);
@@ -967,7 +972,7 @@ bool llvm::computeUnrollCount(
     UP.Runtime |= (PragmaCount > 0);
     return ExplicitUnroll;
   } else {
-    if (ExplicitUnroll && TripCount != 0) {
+    if (RelaxUnrollThrehsholds || (ExplicitUnroll && TripCount != 0)) {
       // If the loop has an unrolling pragma, we want to be more aggressive with
       // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
       // value which is larger than the default limits.
@@ -1077,7 +1082,8 @@ bool llvm::computeUnrollCount(
   }
 
   // Don't unroll a small upper bound loop unless user or TTI asked to do so.
-  if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
+  if (!RelaxUnrollThrehsholds && MaxTripCount && !UP.Force &&
+      MaxTripCount < UP.MaxUpperBound) {
     UP.Count = 0;
     return false;
   }