[llvm] f170550 - [LoopReroll] Allow for multiple loop control only induction vars
Joshua Cao via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 12 21:01:24 PST 2023
Author: Joshua Cao
Date: 2023-01-12T21:00:35-08:00
New Revision: f1705509400f4fbfb168f8f81f8eaa71dc68fa2a
URL: https://github.com/llvm/llvm-project/commit/f1705509400f4fbfb168f8f81f8eaa71dc68fa2a
DIFF: https://github.com/llvm/llvm-project/commit/f1705509400f4fbfb168f8f81f8eaa71dc68fa2a.diff
LOG: [LoopReroll] Allow for multiple loop control only induction vars
Before this, LoopReroll would fail an assertion, falsely assuming that
there can only possibly a single loop control only induction variable.
For example:
```
%a = phi i16 [ %dec2, %for.body ], [ 0, %entry ]
%b = phi i16 [ %dec1, %for.body ], [ 0, %entry ]
%a.next = add nsw i16 %1, -1
%b.next = add nsw i16 %0, -1
%add = add nsw i16 %a, %b
; ... rerollable code
%cmp.not = icmp eq i16 -10, %add
br i1 %cmp.not, label %exit, label %loop
```
Both %a and %b are valid loop control only induction vars
Additionally, some NFC changes to remove unnecessary isa<PHINode> check
Updated complex_reroll checks
Differential Revision: https://reviews.llvm.org/D141109
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
llvm/test/Transforms/LoopReroll/complex_reroll.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 626188805e374..a0b3189c7e097 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -191,13 +191,14 @@ namespace {
using SmallInstructionVector = SmallVector<Instruction *, 16>;
using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
+ using TinyInstructionVector = SmallVector<Instruction *, 1>;
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> IVToIncMap;
- // For loop with multiple induction variable, remember the one used only to
+ // For loop with multiple induction variables, remember the ones used only to
// control the loop.
- Instruction *LoopControlIV;
+ TinyInstructionVector LoopControlIVs;
// A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
@@ -386,10 +387,10 @@ namespace {
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA,
DenseMap<Instruction *, int64_t> &IncrMap,
- Instruction *LoopCtrlIV)
+ TinyInstructionVector LoopCtrlIVs)
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
- LoopControlIV(LoopCtrlIV) {}
+ LoopControlIVs(LoopCtrlIVs) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -468,7 +469,7 @@ namespace {
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> &IVToIncMap;
- Instruction *LoopControlIV;
+ TinyInstructionVector LoopControlIVs;
};
// Check if it is a compare-like instruction whose user is a branch
@@ -577,33 +578,28 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
// be possible to reroll the loop.
void LoopReroll::collectPossibleIVs(Loop *L,
SmallInstructionVector &PossibleIVs) {
- BasicBlock *Header = L->getHeader();
- for (BasicBlock::iterator I = Header->begin(),
- IE = Header->getFirstInsertionPt(); I != IE; ++I) {
- if (!isa<PHINode>(I))
- continue;
- if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
+ for (Instruction &IV : L->getHeader()->phis()) {
+ if (!IV.getType()->isIntegerTy() && !IV.getType()->isPointerTy())
continue;
if (const SCEVAddRecExpr *PHISCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&IV))) {
if (PHISCEV->getLoop() != L)
continue;
if (!PHISCEV->isAffine())
continue;
- auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
+ const auto *IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
if (IncSCEV) {
- IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
- LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
+ IVToIncMap[&IV] = IncSCEV->getValue()->getSExtValue();
+ LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << IV << " = " << *PHISCEV
<< "\n");
- if (isLoopControlIV(L, &*I)) {
- assert(!LoopControlIV && "Found two loop control only IV");
- LoopControlIV = &(*I);
- LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
+ if (isLoopControlIV(L, &IV)) {
+ LoopControlIVs.push_back(&IV);
+ LLVM_DEBUG(dbgs() << "LRR: Loop control only IV: " << IV
<< " = " << *PHISCEV << "\n");
} else
- PossibleIVs.push_back(&*I);
+ PossibleIVs.push_back(&IV);
}
}
}
@@ -1184,7 +1180,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// Make sure we mark loop-control-only PHIs as used in all iterations. See
// comment above LoopReroll::isLoopControlIV for more information.
BasicBlock *Header = L->getHeader();
- if (LoopControlIV && LoopControlIV != IV) {
+ for (Instruction *LoopControlIV : LoopControlIVs) {
for (auto *U : LoopControlIV->users()) {
Instruction *IVUser = dyn_cast<Instruction>(U);
// IVUser could be loop increment or compare
@@ -1633,7 +1629,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *BackedgeTakenCount,
ReductionTracker &Reductions) {
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
- IVToIncMap, LoopControlIV);
+ IVToIncMap, LoopControlIVs);
if (!DAGRoots.findRoots())
return false;
@@ -1676,7 +1672,7 @@ bool LoopReroll::runOnLoop(Loop *L) {
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
IVToIncMap.clear();
- LoopControlIV = nullptr;
+ LoopControlIVs.clear();
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
diff --git a/llvm/test/Transforms/LoopReroll/complex_reroll.ll b/llvm/test/Transforms/LoopReroll/complex_reroll.ll
index 8e21e8d145a6f..7c656f5757a00 100644
--- a/llvm/test/Transforms/LoopReroll/complex_reroll.ll
+++ b/llvm/test/Transforms/LoopReroll/complex_reroll.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=loop-reroll %s | FileCheck %s
declare i32 @goo(i32, i32)
@@ -5,21 +6,30 @@ declare i32 @goo(i32, i32)
@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
define i32 @test1(i32 %len) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]]
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 15
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]])
+; CHECK-NEXT: unreachable
+;
entry:
br label %while.body
while.body:
-;CHECK-LABEL: while.body:
-;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %entry ]
-;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
-;CHECK-NEXT: %0 = trunc i64 %indvar to i32
-;CHECK-NEXT: %scevgep = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 %indvar
-;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %scevgep, align 1
-;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
-;CHECK-NEXT: %add = add i64 %conv, %sum44.020
-;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
-;CHECK-NEXT: %exitcond = icmp eq i32 %0, 15
-;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
%dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
%buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
@@ -51,6 +61,33 @@ while.end: ; preds = %while.body
}
define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_9:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP_9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.for.cond.cleanup_crit_edge:
+; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[ADD2_LCSSA]], [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]] ], [ [[S:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT: [[S_ADDR_011:%.*]] = phi i32 [ [[S]], [[FOR_BODY_LR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[SCEVGEP]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP5]], [[S_ADDR_011]]
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]], label [[FOR_BODY]]
+;
entry:
%cmp.9 = icmp sgt i32 %N, 0
br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
@@ -66,16 +103,6 @@ for.cond.cleanup:
ret i32 %S.addr.0.lcssa
for.body:
-;CHECK-LABEL: for.body:
-;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
-;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
-;CHECK-NEXT: %4 = trunc i64 %indvar to i32
-;CHECK-NEXT: %scevgep = getelementptr i32, i32* %a, i64 %indvar
-;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4
-;CHECK-NEXT: %add = add nsw i32 %5, %S.addr.011
-;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
-;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3
-;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
%i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
%S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
@@ -92,6 +119,34 @@ for.body:
}
define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[LEN:%.*]], 1
+; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]]
+; CHECK: while.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -2
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[S_012:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[INDVAR]], -1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[BUF:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SCEVGEP]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_012]]
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ]
+; CHECK-NEXT: br label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[S_0_LCSSA]]
+;
entry:
%cmp10 = icmp sgt i32 %len, 1
br i1 %cmp10, label %while.body.preheader, label %while.end
@@ -100,17 +155,6 @@ while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
-;CHECK-LABEL: while.body:
-;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
-;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
-;CHECK-NEXT: %4 = trunc i64 %indvar to i32
-;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1
-;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5
-;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4
-;CHECK-NEXT: %add = add nsw i32 %6, %S.012
-;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
-;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3
-;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
%i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
%S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
@@ -133,3 +177,60 @@ while.end: ; preds = %while.end.loopexit,
ret i32 %S.0.lcssa
}
+define i32 @test4(i32 %len) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]]
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 23
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]])
+; CHECK-NEXT: unreachable
+;
+entry:
+ br label %while.body
+
+while.body:
+ %a = phi i32 [ 4, %entry ], [ %a.next, %while.body ]
+ %b = phi i32 [ 6, %entry ], [ %b.next, %while.body ]
+ %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+ %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
+ %0 = load i8, i8* %buf.021, align 1
+ %conv = zext i8 %0 to i64
+ %add = add i64 %conv, %sum44.020
+ %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
+ %1 = load i8, i8* %arrayidx1, align 1
+ %conv2 = zext i8 %1 to i64
+ %add3 = add i64 %add, %conv2
+ %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
+ %2 = load i8, i8* %arrayidx4, align 1
+ %conv5 = zext i8 %2 to i64
+ %add6 = add i64 %add3, %conv5
+ %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
+ %3 = load i8, i8* %arrayidx7, align 1
+ %conv8 = zext i8 %3 to i64
+ %add9 = add i64 %add6, %conv8
+ %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
+ %a.next = add nsw i32 %a, -1
+ %b.next = add nsw i32 %b, -1
+ %cond = add nsw i32 %a, %b
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ %conv11 = trunc i64 %add9 to i32
+ %call = tail call i32 @goo(i32 0, i32 %conv11)
+ unreachable
+}
+
More information about the llvm-commits
mailing list