[llvm] [LoopPeel] Avoid unprofitable last iteration peeling with equalitis (PR #143588)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 12:18:53 PDT 2025
https://github.com/preames created https://github.com/llvm/llvm-project/pull/143588
The peeling profitability logic considers two types of conditions: monotonic relational and equalities. Monotonic conditions can only toggle from true to false (or vice versa) once, but equalities (even with no wrap) can toggle twice on successive iterations.
The forward peeling code has a special case for this where we will advance the peel count by one to ensure we get both transitions, but we didn't consider this in the recently added reverse peeling code. This isn't unsound, but it does result in us peeling a loop without being able to actully eliminate the condition in the loop.
This patch guards against identifying the second transition. An alternate approach would be to extend the reverse peeling to allow more than one iteration, and peel off two iterations.
>From 4a6cdbf2b42f8b7b4ea62f0bd3e3720d71f806ca Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 10 Jun 2025 11:58:16 -0700
Subject: [PATCH] [LoopPeel] Avoid unprofitable last iteration peeling with
equalitis
The peeling profitability logic considers two types of conditions:
monotonic relational and equalities. Monotonic conditions can
only toggle from true to false (or vice versa) once, but equalities
(even with no wrap) can toggle twice on successive iterations.
The forward peeling code has a special case for this where we
will advance the peel count by one to ensure we get both transitions,
but we didn't consider this in the recently added reverse peeling
code. This isn't unsound, but it does result in us peeling a loop
without being able to actully eliminate the condition in the loop.
This patch guards against identifying the second transition. An
alternate approach would be to extend the reverse peeling to
allow more than one iteration, and peel off two iterations.
---
llvm/lib/Transforms/Utils/LoopPeel.cpp | 20 +++++++++--
...last-iteration-with-variable-trip-count.ll | 36 ++++---------------
2 files changed, 24 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f343962548259..7e56c09dcc87d 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -381,9 +381,23 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
- return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
- RightSCEV) &&
- SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
+ CmpPredicate InvPred = ICmpInst::getInversePredicate(Pred);
+ if (!SE.isKnownPredicate(InvPred, ValAtLastIter, RightSCEV) ||
+ !SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV))
+ return false;
+
+ // For a monotonic predicate, we've found the single transition point and
+ // don't need to check further.
+ if (!ICmpInst::isEquality(Pred))
+ return true;
+
+ // For an equality, the condition can toggle twice. For the peeling to be
+ // profitable, we need to know that this is the first toggle. The two
+ // toggles are separate by one iteration. If the BTC isn't at least two
+ // this will overflow, but that's okay correctness wise.
+ const SCEV *ValAtPriorIter = LeftAR->evaluateAtIteration(
+ SE.getMinusSCEV(BTC, SE.getConstant(BTC->getType(), 2)), SE);
+ return SE.isKnownPredicate(Pred, ValAtPriorIter, RightSCEV);
}
// Return the number of iterations to peel off from the beginning and end of the
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
index 5704be33f0f0b..34b53c54d9bc6 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll
@@ -176,39 +176,18 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar2(i32 %n) {
; CHECK-SAME: i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -2
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
-; CHECK: [[ENTRY_SPLIT]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[IV]], [[SUB]]
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2
-; CHECK-NEXT: call void @foo(i32 [[SEL]])
-; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
-; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP2]]
-; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
-; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
-; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
-; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
-; CHECK: [[EXIT_PEEL_BEGIN]]:
-; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
-; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
-; CHECK: [[LOOP_PEEL]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ne i32 [[TMP3]], [[SUB]]
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = select i1 [[C_PEEL]], i32 1, i32 2
; CHECK-NEXT: call void @foo(i32 [[SEL_LCSSA]])
-; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP3]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[TMP3]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
-; CHECK: [[EXIT_PEEL_NEXT]]:
-; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
-; CHECK: [[LOOP_PEEL_NEXT]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret i32 [[SEL_LCSSA]]
+; CHECK-NEXT: [[SEL_LCSSA1:%.*]] = phi i32 [ [[SEL_LCSSA]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[SEL_LCSSA1]]
;
entry:
%sub = add i32 %n, -2
@@ -245,7 +224,7 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar3(i32 %n) {
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP1]]
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]]
@@ -363,7 +342,7 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) {
; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
;
entry:
%sub = add i32 %n, -1
@@ -392,5 +371,4 @@ inner.latch:
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
;.
More information about the llvm-commits
mailing list