[llvm] [LoopUnroll] Enable allowexpensivetripcounts when user provides pragma (PR #181267)
Adel Ejjeh via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 08:44:59 PDT 2026
https://github.com/adelejjeh updated https://github.com/llvm/llvm-project/pull/181267
>From ee64ece26a75c2d193a818f7c148f7fa71f413fb Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Thu, 12 Feb 2026 14:29:59 -0600
Subject: [PATCH 1/4] Enable allowexpensivetripcounts when user provides pragma
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 5 +
.../LoopUnroll/expensive-tripcount.ll | 101 ++++++++++++++++++
2 files changed, 106 insertions(+)
create mode 100644 llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4acd5aa27f61a..73f1b0a8e7fa5 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -953,6 +953,11 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
+ // If a user provided an explicit unroll pragma, it should override expensive
+ // trip count checks
+ if (ExplicitUnroll) {
+ UP.AllowExpensiveTripCount = true;
+ }
PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
PragmaEnableUnroll);
diff --git a/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
new file mode 100644
index 0000000000000..4d9f39ccbb447
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/expensive-tripcount.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s
+; Checks that loops with expensive trip counts are unrolled when the loop.unroll.enable metadata is present.
+
+%struct.ArgVec = type { [4 x float] }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @complex_loop_unroll(i64 noundef %input_offset, i64 noundef %step, i64 noundef %n) {
+; CHECK-LABEL: define dso_local void @complex_loop_unroll(
+; CHECK-SAME: i64 noundef [[INPUT_OFFSET:%.*]], i64 noundef [[STEP:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[STEP]], [[INPUT_OFFSET]]
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[TMP0]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[SMAX]], [[UMIN]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[STEP]], i64 1)
+; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], [[UMAX]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[UMIN]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP6]], 7
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP5]], 7
+; CHECK-NEXT: br i1 [[TMP7]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK: [[ENTRY_NEW]]:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP6]], [[XTRAITER]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[ADD_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP8]], [[INDVARS_IV]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[STEP]]
+; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[ADD]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[STEP]]
+; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[STEP]]
+; CHECK-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_2]], [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[STEP]]
+; CHECK-NEXT: [[ADD_4:%.*]] = add i64 [[ADD_3]], [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[STEP]]
+; CHECK-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_4]], [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[STEP]]
+; CHECK-NEXT: [[ADD_6:%.*]] = add i64 [[ADD_5]], [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[STEP]]
+; CHECK-NEXT: [[ADD_7]] = add i64 [[ADD_6]], [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_6]], [[STEP]]
+; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp ne i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %[[FOR_BODY]], label %[[FOR_END14_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[FOR_END14_UNR_LCSSA]]:
+; CHECK-NEXT: [[DOTUNR:%.*]] = phi i64 [ [[ADD_7]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END14:.*]]
+; CHECK: [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK-NEXT: [[DOTEPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[DOTUNR]], %[[FOR_END14_UNR_LCSSA]] ]
+; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[ENTRY]] ], [ [[INDVARS_IV_UNR]], %[[FOR_END14_UNR_LCSSA]] ]
+; CHECK-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
+; CHECK-NEXT: br label %[[FOR_BODY_EPIL:.*]]
+; CHECK: [[FOR_BODY_EPIL]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ [[DOTEPIL_INIT]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_EPIL_INIT]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ]
+; CHECK-NEXT: [[ADD_EPIL]] = add i64 [[TMP9]], [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nsw i64 [[INDVARS_IV_EPIL]], [[STEP]]
+; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_BODY_EPIL]], label %[[FOR_END14_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END14_EPILOG_LCSSA]]:
+; CHECK-NEXT: br label %[[FOR_END14]]
+; CHECK: [[FOR_END14]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ %input_offset, %entry ], [ %indvars.iv.next, %for.body ]
+ %add = add i64 %3, %indvars.iv
+ %indvars.iv.next = add nsw i64 %indvars.iv, %step
+ %cmp = icmp slt i64 %indvars.iv.next, %n
+ br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !0
+
+for.end14: ; preds = %for.body, %entry
+ ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.unroll.enable"}
+
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+;.
>From ff0d1cb5be85ca7dd03a38af5bbb7dcc39a6ec4b Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Wed, 25 Feb 2026 07:46:17 -0600
Subject: [PATCH 2/4] Refactor AllowExpensiveTripCount setting out of
`if(shouldPragmaUnroll())`
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 73f1b0a8e7fa5..68bca9b09c645 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -953,11 +953,6 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
- // If a user provided an explicit unroll pragma, it should override expensive
- // trip count checks
- if (ExplicitUnroll) {
- UP.AllowExpensiveTripCount = true;
- }
PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
PragmaEnableUnroll);
@@ -972,15 +967,19 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Runtime = false;
return true;
}
+ // If explicit unroll pragma is present, override expensive trip count checks.
+ // This applies to full unroll, partial unroll with a trip count, and partial
+ // unroll without a trip count.
+ if (ExplicitUnroll) {
+ UP.AllowExpensiveTripCount = true;
+ }
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
-
if (UserUnrollCount || (PragmaCount > 0)) {
- UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
UP.Runtime |= (PragmaCount > 0);
>From 431200b086bc58c41476732d24d472eb76f78d37 Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Tue, 3 Mar 2026 12:27:23 -0600
Subject: [PATCH 3/4] Set AllowExpensiveTripCount and Runtime at the same time
---
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 68bca9b09c645..65ac1a634d446 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -967,11 +967,11 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Runtime = false;
return true;
}
- // If explicit unroll pragma is present, override expensive trip count checks.
- // This applies to full unroll, partial unroll with a trip count, and partial
- // unroll without a trip count.
- if (ExplicitUnroll) {
+ // If a user provided an explicit unroll pragma (with or without count),
+ // enable runtime unrolling and override expensive trip count checks.
+ if (PragmaEnableUnroll || PragmaCount > 0) {
UP.AllowExpensiveTripCount = true;
+ UP.Runtime = true;
}
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
@@ -980,9 +980,9 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
MaxTripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
- UP.Runtime |= (PragmaCount > 0);
return ExplicitUnroll;
} else {
if (ExplicitUnroll && TripCount != 0) {
@@ -1106,7 +1106,6 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.AllowExpensiveTripCount = true;
}
}
- UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(dbgs().indent(2)
<< "Will not try to unroll loop with runtime trip count "
>From 4b39e8d074640ed219fd2308df59dd9a1b683ecc Mon Sep 17 00:00:00 2001
From: Adel Ejjeh <adel.ejjeh at amd.com>
Date: Mon, 9 Mar 2026 10:29:00 -0500
Subject: [PATCH 4/4] [LoopUnroll] Set AllowExpensiveTripCount for
pragma-unrolled loops before TTI
Move the setting of UP.AllowExpensiveTripCount and UP.Runtime for
pragma-annotated loops from computeUnrollCount() into
gatherUnrollingPreferences(), before the TTI call. This allows targets
to opt out of expensive trip count computation by overriding
AllowExpensiveTripCount in their getUnrollingPreferences().
All non-AMDGPU targets explicitly opt out to preserve existing behavior.
Target maintainers can remove the opt-out to enable the new behavior for
their target. AMDGPU benefits from this change as it improves unrolling
of loops with runtime trip counts when an explicit unroll pragma is
present.
The condition for the early setting is PragmaEnableUnroll || PragmaCount > 0,
excluding PragmaFullUnroll (which does not need runtime unrolling) and
UserUnrollCount (which is handled separately in shouldPragmaUnroll()).
Additionally, pass OnlyFullUnroll to computeUnrollCount() so
LoopFullUnrollPass can bail out before attempting partial/runtime unrolling.
---
.../llvm/Transforms/Utils/UnrollLoop.h | 3 +-
.../AArch64/AArch64TargetTransformInfo.cpp | 4 +
.../lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 +
.../lib/Target/AVR/AVRTargetTransformInfo.cpp | 9 ++
llvm/lib/Target/AVR/AVRTargetTransformInfo.h | 4 +
.../DirectX/DirectXTargetTransformInfo.cpp | 9 ++
.../DirectX/DirectXTargetTransformInfo.h | 4 +
.../Hexagon/HexagonTargetTransformInfo.cpp | 3 +
.../LoongArchTargetTransformInfo.cpp | 9 ++
.../LoongArch/LoongArchTargetTransformInfo.h | 4 +
.../Target/Mips/MipsTargetTransformInfo.cpp | 9 ++
.../lib/Target/Mips/MipsTargetTransformInfo.h | 4 +
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 4 +
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +
.../Target/SPIRV/SPIRVTargetTransformInfo.cpp | 9 ++
.../Target/SPIRV/SPIRVTargetTransformInfo.h | 4 +
.../Target/Sparc/SparcTargetTransformInfo.cpp | 9 ++
.../Target/Sparc/SparcTargetTransformInfo.h | 4 +
.../WebAssemblyTargetTransformInfo.cpp | 4 +
.../lib/Target/X86/X86TargetTransformInfo.cpp | 9 ++
llvm/lib/Target/X86/X86TargetTransformInfo.h | 4 +
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 108 ++++++++++--------
.../LoopUnroll/full-unroll-avoid-partial.ll | 1 -
23 files changed, 176 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index c6c691e683b22..0c93400c5282a 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -165,7 +165,8 @@ computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple,
const UnrollCostEstimator &UCE,
TargetTransformInfo::UnrollingPreferences &UP,
- TargetTransformInfo::PeelingPreferences &PP);
+ TargetTransformInfo::PeelingPreferences &PP,
+ const bool OnlyFullUnroll = false);
LLVM_ABI std::optional<RecurrenceDescriptor>
canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 40905d86ff3e0..30d0d1770f7a1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5371,6 +5371,10 @@ void AArch64TTIImpl::getUnrollingPreferences(
// Enable partial unrolling and runtime unrolling.
BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+
UP.UpperBound = true;
// For inner loop, it is more likely to be a hot one, and the runtime check
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index bcf4d6286b4d3..5067a13a3f72a 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2683,6 +2683,10 @@ TailFoldingStyle ARMTTIImpl::getPreferredTailFoldingStyle() const {
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+
// Enable Upper bound unrolling universally, providing that we do not see an
// active lane mask, which will be better kept as a loop to become tail
// predicated than to be conditionally unrolled.
diff --git a/llvm/lib/Target/AVR/AVRTargetTransformInfo.cpp b/llvm/lib/Target/AVR/AVRTargetTransformInfo.cpp
index b1ef38047c07b..63a862b594ea6 100644
--- a/llvm/lib/Target/AVR/AVRTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetTransformInfo.cpp
@@ -22,3 +22,12 @@ bool AVRTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
std::tie(C2.Insns, C2NumRegs, C2.AddRecCost, C2.NumIVMuls,
C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+void AVRTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/AVR/AVRTargetTransformInfo.h b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h
index 338a7c8082ca3..f9747136dbed4 100644
--- a/llvm/lib/Target/AVR/AVRTargetTransformInfo.h
+++ b/llvm/lib/Target/AVR/AVRTargetTransformInfo.h
@@ -43,6 +43,10 @@ class AVRTTIImpl final : public BasicTTIImplBase<AVRTTIImpl> {
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) const override;
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index a2d7ffefbb5a2..bd13f2ef75f0a 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -83,3 +83,12 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
return false;
}
}
+
+void DirectXTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
index e2dd4354a8167..0e1776d2378e3 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
@@ -39,6 +39,10 @@ class DirectXTTIImpl final : public BasicTTIImplBase<DirectXTTIImpl> {
unsigned ScalarOpdIdx) const override;
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
int OpdIdx) const override;
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index c2e9b3527e1ee..eacf55d503e0b 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -85,6 +85,9 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
UP.Runtime = UP.Partial = true;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 5107c8def3799..f9ecbeaf523bc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -133,3 +133,12 @@ LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+
+void LoongArchTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 9b479f9dc0dc5..a7cb368f9c34f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -57,6 +57,10 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp
index e861e43c56c75..56c28fcc5e12d 100644
--- a/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp
@@ -28,3 +28,12 @@ bool MipsTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
std::tie(C2.Insns, C2NumRegs, C2.AddRecCost, C2.NumIVMuls,
C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+void MipsTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.h b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h
index 8f8173915b2fb..31ce1fba90b0c 100644
--- a/llvm/lib/Target/Mips/MipsTargetTransformInfo.h
+++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h
@@ -36,6 +36,10 @@ class MipsTTIImpl final : public BasicTTIImplBase<MipsTTIImpl> {
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) const override;
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index c1fe9300785a3..05c9b87c97103 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -531,6 +531,10 @@ void NVPTXTTIImpl::getUnrollingPreferences(
OptimizationRemarkEmitter *ORE) const {
BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+
// Enable partial unrolling and runtime unrolling, but reduce the
// threshold. This partially unrolls small loops which are often
// unrolled by the PTX to SASS compiler and unrolling earlier can be
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 5394d5dc0be11..58ca4f33e19db 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2851,6 +2851,10 @@ InstructionCost RISCVTTIImpl::getPointersChainCost(
void RISCVTTIImpl::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+
// TODO: More tuning on benchmarks and metrics with changes as needed
// would apply to all settings below to enable performance.
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.cpp
index 95093d2b3c263..7e2ade84f2a48 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.cpp
@@ -38,3 +38,12 @@ Value *llvm::SPIRVTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
return nullptr;
}
}
+
+void llvm::SPIRVTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 60c4e2de2fb23..1421b26edfcaf 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -61,6 +61,10 @@ class SPIRVTTIImpl final : public BasicTTIImplBase<SPIRVTTIImpl> {
Value *NewV) const override;
bool allowVectorElementIndexingUsingGEP() const override { return false; }
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/Sparc/SparcTargetTransformInfo.cpp b/llvm/lib/Target/Sparc/SparcTargetTransformInfo.cpp
index cd8167eb742b7..7b24170f4206f 100644
--- a/llvm/lib/Target/Sparc/SparcTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetTransformInfo.cpp
@@ -20,3 +20,12 @@ SparcTTIImpl::getPopcntSupport(unsigned TyWidth) const {
return TTI::PSK_FastHardware;
return TTI::PSK_Software;
}
+
+void SparcTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/Sparc/SparcTargetTransformInfo.h b/llvm/lib/Target/Sparc/SparcTargetTransformInfo.h
index 3bf9a197ae1d5..0720d4ad2f856 100644
--- a/llvm/lib/Target/Sparc/SparcTargetTransformInfo.h
+++ b/llvm/lib/Target/Sparc/SparcTargetTransformInfo.h
@@ -42,6 +42,10 @@ class SparcTTIImpl final : public BasicTTIImplBase<SparcTTIImpl> {
/// @{
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
/// @}
};
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index f058d734bcb87..0ebc093082821 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -468,6 +468,10 @@ TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(
void WebAssemblyTTIImpl::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+
// Scan the loop: don't unroll loops with calls. This is a standard approach
// for most (all?) targets.
for (BasicBlock *BB : L->blocks())
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 09ece99e1e976..93948ecbdf7ff 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -7327,3 +7327,12 @@ bool X86TTIImpl::useFastCCForInternalCall(Function &F) const {
return true;
}
+
+void X86TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+ // Opt out of allowing expensive trip count computations for pragma-unrolled
+ // loops. Target maintainers can remove this to opt in.
+ UP.AllowExpensiveTripCount = false;
+}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index b3dde1555d0a0..d75dc98372bbd 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -327,6 +327,10 @@ class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> {
bool useFastCCForInternalCall(Function &F) const override;
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const override;
+
private:
bool supportsGather() const;
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 65ac1a634d446..6781c404f95b3 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -183,6 +183,46 @@ static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
/// code expansion would result.
static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
+// Returns the loop hint metadata node with the given name (for example,
+// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
+// returned.
+static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
+ if (MDNode *LoopID = L->getLoopID())
+ return GetUnrollMetadata(LoopID, Name);
+ return nullptr;
+}
+
+// Returns true if the loop has an unroll(full) pragma.
+static bool hasUnrollFullPragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
+}
+
+// Returns true if the loop has an unroll(enable) pragma. This metadata is used
+// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
+static bool hasUnrollEnablePragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+}
+
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
+ return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
+}
+
+// If loop has an unroll_count pragma return the (necessarily
+// positive) value from the pragma. Otherwise return 0.
+static unsigned unrollCountPragmaValue(const Loop *L) {
+ MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
+ if (MD) {
+ assert(MD->getNumOperands() == 2 &&
+ "Unroll count hint metadata should have two operands.");
+ unsigned Count =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(Count >= 1 && "Unroll count must be positive.");
+ return Count;
+ }
+ return 0;
+}
+
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
@@ -222,6 +262,16 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.RuntimeUnrollMultiExit = false;
UP.AddAdditionalAccumulators = false;
+ const unsigned PragmaCount = unrollCountPragmaValue(L);
+ const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ // If a user provided an explicit unroll pragma (with or without count),
+ // enable runtime unrolling and override expensive trip count checks by
+ // default. Target-specific opt-out can be achieved by TTI.
+ if (PragmaEnableUnroll || PragmaCount > 0) {
+ UP.AllowExpensiveTripCount = true;
+ UP.Runtime = true;
+ }
+
// Override with any target specific settings
TTI.getUnrollingPreferences(L, SE, UP, &ORE);
@@ -746,46 +796,6 @@ uint64_t UnrollCostEstimator::getUnrolledLoopSize(
return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
}
-// Returns the loop hint metadata node with the given name (for example,
-// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
-// returned.
-static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
- if (MDNode *LoopID = L->getLoopID())
- return GetUnrollMetadata(LoopID, Name);
- return nullptr;
-}
-
-// Returns true if the loop has an unroll(full) pragma.
-static bool hasUnrollFullPragma(const Loop *L) {
- return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
-}
-
-// Returns true if the loop has an unroll(enable) pragma. This metadata is used
-// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
-static bool hasUnrollEnablePragma(const Loop *L) {
- return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
-}
-
-// Returns true if the loop has an runtime unroll(disable) pragma.
-static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
- return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
-}
-
-// If loop has an unroll_count pragma return the (necessarily
-// positive) value from the pragma. Otherwise return 0.
-static unsigned unrollCountPragmaValue(const Loop *L) {
- MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
- if (MD) {
- assert(MD->getNumOperands() == 2 &&
- "Unroll count hint metadata should have two operands.");
- unsigned Count =
- mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
- assert(Count >= 1 && "Unroll count must be positive.");
- return Count;
- }
- return 0;
-}
-
// Computes the boosting factor for complete unrolling.
// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
// be beneficial to fully unroll the loop even if unrolledcost is large. We
@@ -942,7 +952,8 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
const unsigned TripMultiple,
const UnrollCostEstimator &UCE,
TargetTransformInfo::UnrollingPreferences &UP,
- TargetTransformInfo::PeelingPreferences &PP) {
+ TargetTransformInfo::PeelingPreferences &PP,
+ const bool OnlyFullUnroll) {
unsigned LoopSize = UCE.getRolledLoopSize();
@@ -967,12 +978,6 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
UP.Runtime = false;
return true;
}
- // If a user provided an explicit unroll pragma (with or without count),
- // enable runtime unrolling and override expensive trip count checks.
- if (PragmaEnableUnroll || PragmaCount > 0) {
- UP.AllowExpensiveTripCount = true;
- UP.Runtime = true;
- }
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
@@ -1084,6 +1089,12 @@ bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
"trip count";
});
+ if (OnlyFullUnroll) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
+ UP.Count = 0;
+ return false;
+ }
// 7th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (hasRuntimeUnrollDisablePragma(L)) {
@@ -1307,7 +1318,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// fully unroll the loop.
bool IsCountSetExplicitly =
computeUnrollCount(L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount,
- MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
+ MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
+ OnlyFullUnroll);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
index 961c12538cae8..a7725533e7549 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
@@ -18,7 +18,6 @@
; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body
; LOOP-UNROLL-FULL-NEXT: Loop Size = 9
-; LOOP-UNROLL-FULL-NEXT: Runtime unrolling with count: 8
; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll
define void @pragma_unroll(ptr %queue, i32 %num_elements) {
entry:
More information about the llvm-commits
mailing list