[llvm] cb32b8b - [LoopUnrollPass] Don't pre-set `UP.Count` before legality checks in `computeUnrollCount()` (#185979)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 31 12:51:11 PDT 2026
Author: Justin Fargnoli
Date: 2026-03-31T19:50:52Z
New Revision: cb32b8bffbf32fd7936808ad260764c28e3197db
URL: https://github.com/llvm/llvm-project/commit/cb32b8bffbf32fd7936808ad260764c28e3197db
DIFF: https://github.com/llvm/llvm-project/commit/cb32b8bffbf32fd7936808ad260764c28e3197db.diff
LOG: [LoopUnrollPass] Don't pre-set `UP.Count` before legality checks in `computeUnrollCount()` (#185979)
We currently set `UP.Count` to `TripCount` and `MaxTripCount` prior to
full and upper bound unrolling, respectively. This was likely done to
ensure that calls to `UCE.getUnrolledLoopSize(UP)` use the appropriate
trip count. However, we can use `UCE.getUnrolledLoopSize(UP,
FullUnrollTripCount)` instead.
To prevent unintentional unrolling, we set `UP.Count = 0` when
early-exiting `computeUnrollCount()`. (Note: this does not occur
[here](https://github.com/llvm/llvm-project/blob/eb687fb10613573b5d60dfbbb146e15f07809d82/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L1190-L1198).
This seems like a bug.)
We only perform early exits when evaluating runtime unrolling. At that
point, [we know `TripCount` is
false](https://github.com/justinfargnoli/llvm-project/blob/3fb31e7b06f6f6d08c7310506a2f363d198a6790/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L1157-L1158),
and thus we could not have leaked `TripCount`. However, we [could've
leaked
`MaxTripCount`](https://github.com/llvm/llvm-project/blob/eb687fb10613573b5d60dfbbb146e15f07809d82/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L1102-L1110).
It seems like:
https://github.com/llvm/llvm-project/blob/eb687fb10613573b5d60dfbbb146e15f07809d82/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L1181-L1188
was supposed to handle this case. However:
- It uses `<` instead of `<=`. This breaks the existing convention
[[1]](https://github.com/llvm/llvm-project/blob/eb687fb10613573b5d60dfbbb146e15f07809d82/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L869)
[[2]](https://github.com/llvm/llvm-project/blob/eb687fb10613573b5d60dfbbb146e15f07809d82/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L1103)
for how `UP.MaxUpperBound` is treated.
- It's ignored when a target sets `UP.Force = true`.
Thus:
- When `UP.Force == false`, we leak `MaxTripCount` into runtime
unrolling when `MaxTripCount && (UP.UpperBound || MaxOrZero) &&
MaxTripCount == UP.MaxUpperBound`
- When `UP.Force == true`, we leak `MaxTripCount` into runtime unrolling
when `MaxTripCount && (UP.UpperBound || MaxOrZero) && MaxTripCount <=
UP.MaxUpperBound`.
This PR:
- Uses `UCE.getUnrolledLoopSize(UP, FullUnrollTripCount)`
- Stops setting `TripCount` and `MaxTripCount` prior to calling
`shouldFullUnroll()`
- Removes the `UP.Count = 0` safeguards
- Swaps `<` with `<=`, to address the `UP.Force == false` case
- Adds a test to document the behavior change (no longer leaking
`MaxTripCount`) in the `UP.Force == true` case.
Added:
llvm/test/Transforms/LoopUnroll/RISCV/runtime-unroll-max-trip-count.ll
Modified:
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/test/Transforms/LoopUnroll/debug.ll
llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9fdf7ef1b0a86..ce15427c43e4a 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -879,7 +879,7 @@ static std::optional<unsigned> shouldFullUnroll(
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
- uint64_t UnrolledSize = UCE.getUnrolledLoopSize(UP);
+ uint64_t UnrolledSize = UCE.getUnrolledLoopSize(UP, FullUnrollTripCount);
if (UnrolledSize < UP.Threshold) {
LLVM_DEBUG(dbgs().indent(2) << "Unrolling: size " << UnrolledSize
<< " < threshold " << UP.Threshold << ".\n");
@@ -1054,9 +1054,8 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
// 3rd priority is exact full unrolling. This will eliminate all copies
// of some exit test.
LLVM_DEBUG(dbgs().indent(1) << "Trying full unroll...\n");
- UP.Count = 0;
+ assert(UP.Count == 0);
if (TripCount) {
- UP.Count = TripCount;
if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
TripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
@@ -1079,7 +1078,6 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
LLVM_DEBUG(dbgs().indent(1) << "Trying upper-bound unroll...\n");
if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
MaxTripCount <= UP.MaxUpperBound) {
- UP.Count = MaxTripCount;
if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
@@ -1152,16 +1150,14 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
if (PInfo.PragmaRuntimeUnrollDisable) {
LLVM_DEBUG(dbgs().indent(2)
<< "Not runtime unrolling: disabled by pragma.\n");
- UP.Count = 0;
return;
}
// Don't unroll a small upper bound loop unless user or TTI asked to do so.
- if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
- LLVM_DEBUG(dbgs().indent(2)
- << "Not runtime unrolling: max trip count " << MaxTripCount
- << " is small (< " << UP.MaxUpperBound << ") and not forced.\n");
- UP.Count = 0;
+ if (MaxTripCount && !UP.Force && MaxTripCount <= UP.MaxUpperBound) {
+ LLVM_DEBUG(dbgs().indent(2) << "Not runtime unrolling: max trip count "
+ << MaxTripCount << " is small (<= "
+ << UP.MaxUpperBound << ") and not forced.\n");
return;
}
@@ -1174,17 +1170,18 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.AllowExpensiveTripCount = true;
}
}
+
UP.Runtime |= PInfo.PragmaEnableUnroll || PInfo.PragmaCount > 0 ||
PInfo.UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(dbgs().indent(2)
<< "Will not try to unroll loop with runtime trip count "
<< "because -unroll-runtime not given\n");
- UP.Count = 0;
return;
}
- if (UP.Count == 0)
- UP.Count = UP.DefaultUnrollRuntimeCount;
+
+ assert(UP.Count == 0);
+ UP.Count = UP.DefaultUnrollRuntimeCount;
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/runtime-unroll-max-trip-count.ll b/llvm/test/Transforms/LoopUnroll/RISCV/runtime-unroll-max-trip-count.ll
new file mode 100644
index 0000000000000..8191a527dc222
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/runtime-unroll-max-trip-count.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=loop-unroll -mattr=+no-default-unroll | FileCheck %s
+
+; Regression test: when a loop has a small MaxTripCount and falls through to
+; runtime unrolling, the unroll count should be based on DefaultUnrollRuntimeCount
+; (not MaxTripCount). Previously, a quirk would set UP.Count = MaxTripCount (5)
+; before the power-of-two reduction, yielding count 2 (5 >> 1). Now the count
+; starts at DefaultUnrollRuntimeCount (8) and reduces to 4 (8 >> 1).
+;
+; This test uses RISC-V because we need a target that sets UP.Force = true
+; (via RISCVTTIImpl::getUnrollingPreferences) to trigger the bug.
+
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @test(i32 %start, ptr %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[END:%.*]] = add i32 [[START:%.*]], 5
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT_3:%.*]], [[LOOP_3:%.*]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT: [[V:%.*]] = load volatile i32, ptr [[GEP]], align 4
+; CHECK-NEXT: store volatile i32 [[V]], ptr [[GEP]], align 4
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[END]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.1:
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 [[IV_NEXT]]
+; CHECK-NEXT: [[V_1:%.*]] = load volatile i32, ptr [[GEP_1]], align 4
+; CHECK-NEXT: store volatile i32 [[V_1]], ptr [[GEP_1]], align 4
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV]], 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV_NEXT_1]], [[END]]
+; CHECK-NEXT: br i1 [[CMP_1]], label [[LOOP_2:%.*]], label [[EXIT]]
+; CHECK: loop.2:
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 [[IV_NEXT_1]]
+; CHECK-NEXT: [[V_2:%.*]] = load volatile i32, ptr [[GEP_2]], align 4
+; CHECK-NEXT: store volatile i32 [[V_2]], ptr [[GEP_2]], align 4
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV]], 3
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV_NEXT_2]], [[END]]
+; CHECK-NEXT: br i1 [[CMP_2]], label [[LOOP_3]], label [[EXIT]]
+; CHECK: loop.3:
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 [[IV_NEXT_2]]
+; CHECK-NEXT: [[V_3:%.*]] = load volatile i32, ptr [[GEP_3]], align 4
+; CHECK-NEXT: store volatile i32 [[V_3]], ptr [[GEP_3]], align 4
+; CHECK-NEXT: [[IV_NEXT_3]] = add nuw i32 [[IV]], 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i32 [[IV_NEXT_3]], [[END]]
+; CHECK-NEXT: br i1 [[CMP_3]], label [[LOOP]], label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %end = add i32 %start, 5
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ]
+ %gep = getelementptr inbounds i32, ptr %p, i32 %iv
+ %v = load volatile i32, ptr %gep
+ %b0 = call i32 @llvm.bswap.i32(i32 %v)
+ %b1 = call i32 @llvm.bswap.i32(i32 %b0)
+ %b2 = call i32 @llvm.bswap.i32(i32 %b1)
+ %b3 = call i32 @llvm.bswap.i32(i32 %b2)
+ %b4 = call i32 @llvm.bswap.i32(i32 %b3)
+ %b5 = call i32 @llvm.bswap.i32(i32 %b4)
+ %b6 = call i32 @llvm.bswap.i32(i32 %b5)
+ %b7 = call i32 @llvm.bswap.i32(i32 %b6)
+ %b8 = call i32 @llvm.bswap.i32(i32 %b7)
+ %b9 = call i32 @llvm.bswap.i32(i32 %b8)
+ %b10 = call i32 @llvm.bswap.i32(i32 %b9)
+ %b11 = call i32 @llvm.bswap.i32(i32 %b10)
+ %b12 = call i32 @llvm.bswap.i32(i32 %b11)
+ %b13 = call i32 @llvm.bswap.i32(i32 %b12)
+ %b14 = call i32 @llvm.bswap.i32(i32 %b13)
+ %b15 = call i32 @llvm.bswap.i32(i32 %b14)
+ %b16 = call i32 @llvm.bswap.i32(i32 %b15)
+ %b17 = call i32 @llvm.bswap.i32(i32 %b16)
+ %b18 = call i32 @llvm.bswap.i32(i32 %b17)
+ %b19 = call i32 @llvm.bswap.i32(i32 %b18)
+ %b20 = call i32 @llvm.bswap.i32(i32 %b19)
+ %b21 = call i32 @llvm.bswap.i32(i32 %b20)
+ %b22 = call i32 @llvm.bswap.i32(i32 %b21)
+ %b23 = call i32 @llvm.bswap.i32(i32 %b22)
+ %b24 = call i32 @llvm.bswap.i32(i32 %b23)
+ %b25 = call i32 @llvm.bswap.i32(i32 %b24)
+ store volatile i32 %b25, ptr %gep
+ %iv.next = add nuw i32 %iv, 1
+ %cmp = icmp ult i32 %iv.next, %end
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopUnroll/debug.ll b/llvm/test/Transforms/LoopUnroll/debug.ll
index 6b611952c0935..fd84eeeb3d457 100644
--- a/llvm/test/Transforms/LoopUnroll/debug.ll
+++ b/llvm/test/Transforms/LoopUnroll/debug.ll
@@ -461,7 +461,7 @@ exit:
; CHECK-NEXT: Trying loop peeling...
; CHECK-NEXT: Trying partial unroll...
; CHECK-NEXT: Trying runtime unroll...
-; CHECK-NEXT: Not runtime unrolling: max trip count 3 is small (< 8) and not forced.
+; CHECK-NEXT: Not runtime unrolling: max trip count 3 is small (<= 8) and not forced.
; CHECK-NEXT: Not unrolling: no viable strategy found.
define i32 @runtime_small_max_tc(ptr %A, i32 %n) {
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
index 247cdd6c6be28..c56d4f267124c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=loop-unroll -unroll-runtime %s -o - | FileCheck %s
-; RUN: opt -S -passes=loop-unroll -unroll-runtime -unroll-max-upperbound=6 %s -o - | FileCheck %s --check-prefix=UPPER
+; RUN: opt -S -passes=loop-unroll -unroll-runtime -unroll-max-upperbound=5 %s -o - | FileCheck %s --check-prefix=UPPER
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -70,7 +70,7 @@ exit:
ret void
}
-; Check that loop in hoge_5, with a runtime upperbound of 5, is unrolled when -unroll-max-upperbound=4
+; Check that loop in hoge_5, with a runtime upperbound of 5, is unrolled when -unroll-max-upperbound=5
define dso_local void @hoge_5(i8 %arg) {
; CHECK-LABEL: @hoge_5(
; CHECK-NEXT: entry:
More information about the llvm-commits
mailing list