[llvm] [LoopPeel] Support last iteration peeling of min/max intrinsics (PR #143598)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 13:26:56 PDT 2025
https://github.com/preames created https://github.com/llvm/llvm-project/pull/143598
This isn't terribly useful at the moment because of the step=1 restriction but it should be functionally sound. This is mostly just making sure the codepaths don't diverge as we make other changes.
>From 63dae7e1029b2a145f0ecf6530073a7a0935565e Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 10 Jun 2025 13:19:48 -0700
Subject: [PATCH] [LoopPeel] Support last iteration peeling of min/max
intrinsics
This isn't terribly useful at the moment because of the step=1 restriction
but it should be functionally sound. This is mostly just making sure the
codepaths don't diverge as we make other changes.
---
llvm/lib/Transforms/Utils/LoopPeel.cpp | 5 +-
.../LoopUnroll/peel-last-iteration-minmax.ll | 48 +++++++++++++++----
2 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f343962548259..27e70c5ddc0fc 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -545,8 +545,11 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
const SCEV *IterVal = AddRec->evaluateAtIteration(
SE.getConstant(AddRec->getType(), NewPeelCount), SE);
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
- Pred))
+ Pred)) {
+ if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
+ DesiredPeelCountLast = 1;
return;
+ }
DesiredPeelCount = NewPeelCount;
};
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
index cd098e123b5f6..5e8540814fff2 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-minmax.ll
@@ -41,16 +41,27 @@ define i32 @smin_unit_step() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @foo(i32 1)
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
+; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 1)
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK: [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK: [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
-; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
+; CHECK-NEXT: ret i32 [[MINMAX]]
;
entry:
br label %loop
@@ -74,16 +85,28 @@ define i32 @smax_unit_step() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[SUB1:%.*]] = sub nuw nsw i32 1024, [[IV1]]
+; CHECK-NEXT: call void @foo(i32 [[SUB1]])
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT1]], 1023
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: [[EXIT_PEEL_BEGIN]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ]
+; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
+; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 1024, [[IV]]
; CHECK-NEXT: [[MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB]], i32 1)
; CHECK-NEXT: call void @foo(i32 [[MINMAX]])
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; CHECK: [[EXIT_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]]
+; CHECK: [[LOOP_PEEL_NEXT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[MINMAX_LCSSA:%.*]] = phi i32 [ [[MINMAX]], %[[LOOP]] ]
-; CHECK-NEXT: ret i32 [[MINMAX_LCSSA]]
+; CHECK-NEXT: ret i32 [[MINMAX]]
;
entry:
br label %loop
@@ -135,3 +158,8 @@ exit:
ret i32 %minmax.lcssa
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list