[llvm] [LSR] Don't count conditional loads/store as enabling pre/post-index (PR #159573)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 06:29:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: John Brawn (john-brawn-arm)
<details>
<summary>Changes</summary>
When a load/store is conditionally executed in a loop it isn't a candidate for pre/post-index addressing, as the increment of the address would only happen on those loop iterations where the load/store is executed.
Detect this and only discount the AddRec cost when the load/store is unconditional.
---
Full diff: https://github.com/llvm/llvm-project/pull/159573.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+27-5)
- (modified) llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1a279b6198182..22ab5820b7ec1 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1278,6 +1278,7 @@ struct LSRFixup {
LSRFixup() = default;
bool isUseFullyOutsideLoop(const Loop *L) const;
+ bool isUseUnconditional(const Loop *L) const;
void print(raw_ostream &OS) const;
void dump() const;
@@ -1318,6 +1319,11 @@ class LSRUse {
/// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop = true;
+ /// This records whether all of the fixups using this LSRUse are unconditional
+ /// within the loop, meaning they will be executed in every iteration of the
+ /// loop.
+ bool AllFixupsUnconditional = true;
+
/// RigidFormula is set to true to guarantee that this use will be associated
/// with a single formula--the one that initially matched. Some SCEV
/// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1422,15 +1428,19 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
const SCEV *Start;
const SCEVConstant *Step;
- if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
+ if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step)))) {
// If the step size matches the base offset, we could use pre-indexed
// addressing.
- if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
- Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
- ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
- SE->isLoopInvariant(Start, L)))
+ bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
+ Step->getAPInt() == F.BaseOffset.getFixedValue();
+ bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
+ SE->isLoopInvariant(Start, L);
+ // We can only pre or post index when the load/store is unconditional.
+ if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
LoopCost = 0;
+ }
}
+
// If the loop counts down to zero and we'll be using a hardware loop then
// the addrec will be combined into the hardware loop instruction.
if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
@@ -1647,6 +1657,12 @@ bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
return !L->contains(UserInst);
}
+/// Test whether this fixup is for an instruction that's unconditional, i.e.
+/// it's executed in every loop iteration.
+bool LSRFixup::isUseUnconditional(const Loop *L) const {
+ return isGuaranteedToExecuteForEveryIteration(UserInst, L);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
@@ -1783,6 +1799,9 @@ void LSRUse::print(raw_ostream &OS) const {
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+ if (AllFixupsUnconditional)
+ OS << ", all-fixups-unconditional";
+
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
@@ -3607,6 +3626,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = TmpPostIncLoops;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= LF.isUseUnconditional(L);
// Create SCEV as Formula for calculating baseline cost
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
@@ -3803,6 +3823,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.OperandValToReplace = U;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ LU.AllFixupsUnconditional &= LF.isUseUnconditional(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4940,6 +4961,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
// Transfer the fixups of LU to LUThatHas.
for (LSRFixup &Fixup : LU.Fixups) {
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
index db30fd23b0c9d..065a6c8b980f8 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
@@ -119,8 +119,6 @@ for.end:
; We can't use postindex addressing on the conditional load of qval and can't
; convert the loop condition to a compare with zero, so we should instead use
; offset addressing.
-; FIXME: Currently we don't notice the load of qval is conditional, and attempt
-; postindex addressing anyway.
define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-LABEL: define i32 @conditional_load(
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
@@ -128,7 +126,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
@@ -136,6 +133,8 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]]
; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IDX]], 2
+; CHECK-NEXT: [[LSR_IV:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP0]]
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RET]], [[QVAL]]
; CHECK-NEXT: br label %[[FOR_INC]]
@@ -143,7 +142,6 @@ define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-NEXT: [[RET_NEXT]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[RET]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
``````````
</details>
https://github.com/llvm/llvm-project/pull/159573
More information about the llvm-commits
mailing list