[llvm] [LoopPeel] Support min/max intrinsics in loop peeling (PR #93162)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 23 02:52:55 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Sergey Kachkov (skachkov-sc)
<details>
<summary>Changes</summary>
Motivating example: https://godbolt.org/z/qfxz8rKac
This example shows very different loop peeling behaviour for similar code depending on whether icmp+select was canonicalized to min/max intrinsic or not.
This patch adds processing of min/max intrinsics in LoopPeel in the similar way as it was done for conditional statements: for min/max(IterVal, BoundVal) we peel iterations where IterVal < BoundVal for monotonically increasing IterVal; for monotonically decreasing IterVal we peel iterations where IterVal > BoundVal (strict comparision predicates are used to minimize number of peeled iterations).
Compile-time results on llvm test-suite (including SPEC benchmarks) shows 3 tests affected by this:
```
Program loop-peel.NumPeeled
baseline loop-peel-min-max diff
test-suite :: MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/mpeg2decode.test 0.00 2.00 inf%
test-suite :: MultiSource/Applications/JM/lencod/lencod.test 7.00 9.00 28.6%
test-suite :: External/SPEC/CINT2006/464.h264ref/464.h264ref.test 6.00 7.00 16.7%
```
---
Full diff: https://github.com/llvm/llvm-project/pull/93162.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Utils/LoopPeel.cpp (+49)
- (added) llvm/test/Transforms/LoopUnroll/peel-loop-min-max-intrinsics.ll (+288)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index f76fa3bb6c611..a6d37b9bdf11d 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -449,10 +449,59 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
};
+ auto ComputePeelCountMinMax = [&](IntrinsicInst *II) {
+ bool IsSigned;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ IsSigned = true;
+ break;
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ IsSigned = false;
+ break;
+ default:
+ return;
+ }
+ Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
+ const SCEV *BoundSCEV, *IterSCEV;
+ if (L.isLoopInvariant(LHS)) {
+ BoundSCEV = SE.getSCEV(LHS);
+ IterSCEV = SE.getSCEV(RHS);
+ } else if (L.isLoopInvariant(RHS)) {
+ BoundSCEV = SE.getSCEV(RHS);
+ IterSCEV = SE.getSCEV(LHS);
+ } else
+ return;
+ const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
+ // For simplicity, we support only affine recurrences.
+ if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
+ return;
+ const SCEV *Step = AddRec->getStepRecurrence(SE);
+ // To minimize number of peeled iterations, we use strict relational
+ // predicates here.
+ ICmpInst::Predicate Pred;
+ if (SE.isKnownPositive(Step))
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ else if (SE.isKnownNegative(Step))
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ else
+ return;
+ const SCEV *IterVal = AddRec->evaluateAtIteration(
+ SE.getConstant(AddRec->getType(), DesiredPeelCount), SE);
+ while (DesiredPeelCount < MaxPeelCount &&
+ SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
+ IterVal = SE.getAddExpr(IterVal, Step);
+ ++DesiredPeelCount;
+ }
+ };
+
for (BasicBlock *BB : L.blocks()) {
for (Instruction &I : *BB) {
if (SelectInst *SI = dyn_cast<SelectInst>(&I))
ComputePeelCount(SI->getCondition(), 0);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+ ComputePeelCountMinMax(II);
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-min-max-intrinsics.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-min-max-intrinsics.ll
new file mode 100644
index 0000000000000..1a3f60b482793
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-min-max-intrinsics.ll
@@ -0,0 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=loop-unroll -unroll-peel-max-count=2 | FileCheck %s
+
+declare void @foo(i32)
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+
+define void @test1(i32 %N) {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK: for.body.peel.begin:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
+; CHECK: for.body.peel:
+; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.umin.i32(i32 0, i32 2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
+; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw i32 0, 1
+; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]]
+; CHECK: for.body.peel.next:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK: for.body.peel2:
+; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.umin.i32(i32 [[INC_PEEL]], i32 2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
+; CHECK-NEXT: [[INC_PEEL4:%.*]] = add nuw i32 [[INC_PEEL]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_PEEL5:%.*]] = icmp eq i32 [[INC_PEEL4]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL5]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK: for.body.peel.next1:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
+; CHECK: for.body.peel.next6:
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
+; CHECK: for.body.preheader.peel.newph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[INC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT: tail call void @foo(i32 2)
+; CHECK-NEXT: [[INC]] = add nuw i32 [[I_06]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp5.not = icmp eq i32 %N, 0
+ br i1 %cmp5.not, label %for.cond.cleanup, label %for.body
+
+for.body:
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %cond = tail call i32 @llvm.umin.i32(i32 %i.06, i32 2)
+ tail call void @foo(i32 %cond)
+ %inc = add nuw i32 %i.06, 1
+ %exitcond.not = icmp eq i32 %inc, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test2(i32 %N) {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK: for.body.peel.begin:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
+; CHECK: for.body.peel:
+; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.umax.i32(i32 0, i32 2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
+; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw i32 0, 1
+; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]]
+; CHECK: for.body.peel.next:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK: for.body.peel2:
+; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.umax.i32(i32 [[INC_PEEL]], i32 2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
+; CHECK-NEXT: [[INC_PEEL4:%.*]] = add nuw i32 [[INC_PEEL]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_PEEL5:%.*]] = icmp eq i32 [[INC_PEEL4]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL5]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK: for.body.peel.next1:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
+; CHECK: for.body.peel.next6:
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
+; CHECK: for.body.preheader.peel.newph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[INC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT: tail call void @foo(i32 [[I_06]])
+; CHECK-NEXT: [[INC]] = add nuw i32 [[I_06]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp5.not = icmp eq i32 %N, 0
+ br i1 %cmp5.not, label %for.cond.cleanup, label %for.body
+
+for.body:
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %cond = tail call i32 @llvm.umax.i32(i32 %i.06, i32 2)
+ tail call void @foo(i32 %cond)
+ %inc = add nuw i32 %i.06, 1
+ %exitcond.not = icmp eq i32 %inc, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test3(i32 %N) {
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK: for.body.peel.begin:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
+; CHECK: for.body.peel:
+; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.smax.i32(i32 0, i32 -2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL]])
+; CHECK-NEXT: [[DEC_PEEL:%.*]] = add nsw i32 0, -1
+; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp sgt i32 [[DEC_PEEL]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.peel.next:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK: for.body.peel2:
+; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DEC_PEEL]], i32 -2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND_PEEL3]])
+; CHECK-NEXT: [[DEC_PEEL4:%.*]] = add nsw i32 [[DEC_PEEL]], -1
+; CHECK-NEXT: [[CMP_PEEL5:%.*]] = icmp sgt i32 [[DEC_PEEL4]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.body.peel.next1:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
+; CHECK: for.body.peel.next6:
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
+; CHECK: for.body.preheader.peel.newph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[DEC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT: tail call void @foo(i32 -2)
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[I_06]], -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp5 = icmp slt i32 %N, 0
+ br i1 %cmp5, label %for.body, label %for.cond.cleanup
+
+for.body:
+ %i.06 = phi i32 [ %dec, %for.body ], [ 0, %entry ]
+ %cond = tail call i32 @llvm.smax.i32(i32 %i.06, i32 -2)
+ tail call void @foo(i32 %cond)
+ %dec = add nsw i32 %i.06, -1
+ %cmp = icmp sgt i32 %dec, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test4(i32 %N) {
+; CHECK-LABEL: define void @test4(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK: for.body.peel.begin:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
+; CHECK: for.body.peel:
+; CHECK-NEXT: [[COND_PEEL:%.*]] = tail call i32 @llvm.smin.i32(i32 0, i32 -2)
+; CHECK-NEXT: tail call void @foo(i32 noundef signext [[COND_PEEL]])
+; CHECK-NEXT: [[DEC_PEEL:%.*]] = add nsw i32 0, -1
+; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp sgt i32 [[DEC_PEEL]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.body.peel.next:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK: for.body.peel2:
+; CHECK-NEXT: [[COND_PEEL3:%.*]] = tail call i32 @llvm.smin.i32(i32 [[DEC_PEEL]], i32 -2)
+; CHECK-NEXT: tail call void @foo(i32 noundef signext [[COND_PEEL3]])
+; CHECK-NEXT: [[DEC_PEEL4:%.*]] = add nsw i32 [[DEC_PEEL]], -1
+; CHECK-NEXT: [[CMP_PEEL5:%.*]] = icmp sgt i32 [[DEC_PEEL4]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.body.peel.next1:
+; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]]
+; CHECK: for.body.peel.next6:
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]]
+; CHECK: for.body.preheader.peel.newph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[DEC_PEEL4]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT: tail call void @foo(i32 noundef signext [[I_06]])
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[I_06]], -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp5 = icmp slt i32 %N, 0
+ br i1 %cmp5, label %for.body, label %for.cond.cleanup
+
+for.body:
+ %i.06 = phi i32 [ %dec, %for.body ], [ 0, %entry ]
+ %cond = tail call i32 @llvm.smin.i32(i32 %i.06, i32 -2)
+ tail call void @foo(i32 noundef signext %cond)
+ %dec = add nsw i32 %i.06, -1
+ %cmp = icmp sgt i32 %dec, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_negative(i32 %End, i32 %Step) {
+; CHECK-LABEL: define void @test_negative(
+; CHECK-SAME: i32 [[END:%.*]], i32 [[STEP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT5:%.*]] = icmp eq i32 [[END]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT5]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[COND:%.*]] = tail call i32 @llvm.smin.i32(i32 [[I_06]], i32 2)
+; CHECK-NEXT: tail call void @foo(i32 [[COND]])
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[I_06]], [[STEP]]
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[ADD]], [[END]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp.not5 = icmp eq i32 %End, 0
+ br i1 %cmp.not5, label %for.cond.cleanup, label %for.body
+
+for.body:
+ %i.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %cond = tail call i32 @llvm.smin.i32(i32 %i.06, i32 2)
+ tail call void @foo(i32 %cond)
+ %add = add nsw i32 %i.06, %Step
+ %cmp.not = icmp eq i32 %add, %End
+ br i1 %cmp.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 2}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/93162
More information about the llvm-commits
mailing list