[PATCH] D97219: [LSR] Unify scheduling of existing and inserted addrecs
Philip Reames via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 24 12:56:52 PST 2021
reames updated this revision to Diff 326185.
reames added a comment.
address review comments
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D97219/new/
https://reviews.llvm.org/D97219
Files:
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll
Index: llvm/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll
===================================================================
--- llvm/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll
+++ llvm/test/Transforms/LoopStrengthReduce/post-increment-insertion.ll
@@ -13,13 +13,13 @@
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[IV]]
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret i32 -1
@@ -145,13 +145,13 @@
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[IV_NEXT]] = sub i64 [[IV]], 1
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[IV]]
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
+; CHECK-NEXT: [[IV_NEXT]] = sub i64 [[IV]], 1
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret i32 -1
Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -5561,6 +5561,46 @@
Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
&TLI, MSSAU);
+
+ // In our cost analysis above, we assume that each addrec consumes exactly
+ // one register, and arrange to have increments inserted just before the
+ // latch to maximimize the chance this is true. However, if we reused
+ // existing IVs, we now need to move the increments to match our
+ // expectations. Otherwise, our cost modeling results in us having a
+ // chosen a non-optimal result for the actual schedule. (And yes, this
+ // scheduling decision does impact later codegen.)
+ for (PHINode &PN : L->getHeader()->phis()) {
+ // First, filter out anything not an obvious addrec
+ if (!SE.isSCEVable(PN.getType()) || !isa<SCEVAddRecExpr>(SE.getSCEV(&PN)))
+ continue;
+ Instruction *IncV =
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
+ if (!IncV)
+ continue;
+
+ if (IncV->getOpcode() != Instruction::Add &&
+ IncV->getOpcode() != Instruction::Sub)
+ continue;
+
+ if (IncV->getOperand(0) != &PN &&
+ !isa<Constant>(IncV->getOperand(1)))
+ // If not a constant step, might increase register pressure
+ // (We assume constants have been canonicalized to RHS)
+ continue;
+
+ if (IncV->getParent() == IVIncInsertPos->getParent())
+ // Only bother moving across blocks. Isel can handle block local case.
+ continue;
+
+ // Can we legally schedule inc at the desired point?
+ if (!llvm::all_of(IncV->uses(),
+ [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
+ continue;
+ IncV->moveBefore(IVIncInsertPos);
+ Changed = true;
+ }
+
+
}
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97219.326185.patch
Type: text/x-patch
Size: 4034 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210224/46ce80c6/attachment.bin>
More information about the llvm-commits
mailing list