[llvm] [LoopUnroll] Enable allowexpensivetripcounts when user provides pragma (PR #181267)
Adel Ejjeh via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 13 13:49:49 PDT 2026
https://github.com/adelejjeh updated https://github.com/llvm/llvm-project/pull/181267
>From 7eeebc4546c15153ba35973afa79efb3ef6326e1 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Thu, 12 Feb 2026 14:29:59 -0600
Subject: [PATCH 1/3] Enable allowexpensivetripcounts when user provides pragma
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 5 +
.../LoopUnroll/expensive-tripcount.ll | 101 ++++++++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index f99b9a4a3d043..fbc687b066354 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1017,6 +1017,11 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
+ // If a user provided an explicit unroll pragma, it should override expensive
+ // trip count checks
+ if (ExplicitUnroll) {
+ UP.AllowExpensiveTripCount = true;
+ }
LLVM_DEBUG({
if (ExplicitUnroll) {
diff --git a/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
new file mode 100644
index 0000000000000..4d9f39ccbb447
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s
+; Checks that loops with expensive trip counts are unrolled when the loop.unroll.enable metadata is present.
+
+%struct.ArgVec = type { [4 x float] }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @complex_loop_unroll(i64 noundef %input_offset, i64 noundef %step, i64 noundef %n) {
+; CHECK-LABEL: define dso_local void @complex_loop_unroll(
+; CHECK-SAME: i64 noundef [[INPUT_OFFSET:%.*]], i64 noundef [[STEP:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[STEP]], [[INPUT_OFFSET]]
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[TMP0]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[SMAX]], [[UMIN]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[STEP]], i64 1)
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], [[UMAX]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[UMIN]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP6]], 7
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], 7
+; CHECK-NEXT: br i1 [[TMP7]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK: [[ENTRY_NEW]]:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP6]], [[XTRAITER]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[ADD_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP8]], [[INDVARS_IV]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[STEP]]
+; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[ADD]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[STEP]]
+; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[STEP]]
+; CHECK-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_2]], [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[STEP]]
+; CHECK-NEXT: [[ADD_4:%.*]] = add i64 [[ADD_3]], [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[STEP]]
+; CHECK-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_4]], [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[STEP]]
+; CHECK-NEXT: [[ADD_6:%.*]] = add i64 [[ADD_5]], [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[STEP]]
+; CHECK-NEXT: [[ADD_7]] = add i64 [[ADD_6]], [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[STEP]]
+; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp ne i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %[[FOR_BODY]], label %[[FOR_END14_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[FOR_END14_UNR_LCSSA]]:
+; CHECK-NEXT: [[DOTUNR:%.*]] = phi i64 [ [[ADD_7]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END14:.*]]
+; CHECK: [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK-NEXT: [[DOTEPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[DOTUNR]], %[[FOR_END14_UNR_LCSSA]] ]
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[ENTRY]] ], [ [[INDVARS_IV_UNR]], %[[FOR_END14_UNR_LCSSA]] ]
+; CHECK-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
+; CHECK-NEXT: br label %[[FOR_BODY_EPIL:.*]]
+; CHECK: [[FOR_BODY_EPIL]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ [[DOTEPIL_INIT]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_EPIL_INIT]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[ADD_EPIL]] = add i64 [[TMP9]], [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nsw i64 [[INDVARS_IV_EPIL]], [[STEP]]
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_BODY_EPIL]], label %[[FOR_END14_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END14_EPILOG_LCSSA]]:
+; CHECK-NEXT: br label %[[FOR_END14]]
+; CHECK: [[FOR_END14]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ %input_offset, %entry ], [ %indvars.iv.next, %for.body ]
+ %add = add i64 %3, %indvars.iv
+ %indvars.iv.next = add nsw i64 %indvars.iv, %step
+ %cmp = icmp slt i64 %indvars.iv.next, %n
+ br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !0
+
+for.end14: ; preds = %for.body, %entry
+ ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.unroll.enable"}
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+;.
>From a093ef21d2e6755876b48ef20e8e808a46ae774e Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Wed, 25 Feb 2026 07:46:17 -0600
Subject: [PATCH 2/3] Refactor AllowExpensiveTripCount setting out of
`if(shouldPragmaUnroll())`
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fbc687b066354..d65511d0829ee 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1017,11 +1017,6 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
- // If a user provided an explicit unroll pragma, it should override expensive
- // trip count checks
- if (ExplicitUnroll) {
- UP.AllowExpensiveTripCount = true;
- }
LLVM_DEBUG({
if (ExplicitUnroll) {
@@ -1053,6 +1048,12 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Runtime = false;
return true;
}
+ // If explicit unroll pragma is present, override expensive trip count checks.
+ // This applies to full unroll, partial unroll with a trip count, and partial
+ // unroll without a trip count.
+ if (ExplicitUnroll) {
+ UP.AllowExpensiveTripCount = true;
+ }
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
@@ -1060,9 +1061,7 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
-
if (UserUnrollCount || (PragmaCount > 0)) {
- UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
UP.Runtime |= (PragmaCount > 0);
>From 86ecbea491f873cb7bbbe8edf6793da5d7b47787 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Tue, 3 Mar 2026 12:27:23 -0600
Subject: [PATCH 3/3] Set AllowExpensiveTripCount and Runtime at the same time
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d65511d0829ee..1d90f136572cb 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1048,11 +1048,11 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Runtime = false;
return true;
}
- // If explicit unroll pragma is present, override expensive trip count checks.
- // This applies to full unroll, partial unroll with a trip count, and partial
- // unroll without a trip count.
- if (ExplicitUnroll) {
+ // If a user provided an explicit unroll pragma (with or without count),
+ // enable runtime unrolling and override expensive trip count checks.
+ if (PragmaEnableUnroll || PragmaCount > 0) {
UP.AllowExpensiveTripCount = true;
+ UP.Runtime = true;
}
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@@ -1062,9 +1062,9 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
- UP.Runtime |= (PragmaCount > 0);
return ExplicitUnroll;
} else {
if (ExplicitUnroll && TripCount != 0) {
@@ -1200,7 +1200,6 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.AllowExpensiveTripCount = true;
}
}
- UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(dbgs().indent(2)
<< "Will not try to unroll loop with runtime trip count "
More information about the llvm-commits
mailing list