[llvm-branch-commits] [llvm] [LoopInterchange] Make the instorder profitability check GEP-independent (PR #181991)
Ryotaro Kasuga via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 15 06:30:54 PDT 2026
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/181991
>From f3e056c43d636f436737d3fb3e61a2fae7adf99b Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 17 Feb 2026 10:56:58 +0000
Subject: [PATCH 1/3] [LoopInterchange] Fix instorder profitability check
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 91 ++++++++++---------
...erchangeable-outerloop-multiple-indvars.ll | 2 +-
.../profitability-instorder.ll | 70 ++++++++------
3 files changed, 91 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 91e2510c33851..5420ce1245334 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1556,53 +1556,62 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
return CostMap;
}
+/// If \S contains an affine addrec for \p L0, store the step recurrence of the
+/// addrec in \p Coeff0. Same for \p L1 and \p Coeff1. This function assumes \p
+/// S is an nested affine addrec, and it will recursively look through the start
+/// value of the addrec to find the coefficients. If the expression is in a
+/// complex form, e.g., (addrec + addrec), then the coefficients may not be
+/// found.
+static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
+ const Loop *L0,
+ std::optional<const SCEV *> &Coeff0,
+ const Loop *L1,
+ std::optional<const SCEV *> &Coeff1) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (!AR)
+ return;
+ if (!AR->isAffine()) {
+ LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
+ return;
+ }
+ if (AR->getLoop() == L0)
+ Coeff0 = AR->getStepRecurrence(SE);
+ if (AR->getLoop() == L1)
+ Coeff1 = AR->getStepRecurrence(SE);
+ getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
+}
+
int LoopInterchangeProfitability::getInstrOrderCost() {
unsigned GoodOrder, BadOrder;
BadOrder = GoodOrder = 0;
for (BasicBlock *BB : InnerLoop->blocks()) {
for (Instruction &Ins : *BB) {
- if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
- bool FoundInnerInduction = false;
- bool FoundOuterInduction = false;
- for (Value *Op : GEP->operands()) {
- // Skip operands that are not SCEV-able.
- if (!SE->isSCEVable(Op->getType()))
- continue;
-
- const SCEV *OperandVal = SE->getSCEV(Op);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
- if (!AR)
- continue;
+ if (!isa<LoadInst, StoreInst>(&Ins))
+ continue;
+ const SCEV *Ptr = SE->getSCEV(getLoadStorePointerOperand(&Ins));
+ std::optional<const SCEV *> OuterCoeff, InnerCoeff;
+ getAddRecCoefficients(*SE, Ptr, OuterLoop, OuterCoeff, InnerLoop,
+ InnerCoeff);
+ if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
+ continue;
- // If we find the inner induction after an outer induction e.g.
- // for(int i=0;i<N;i++)
- // for(int j=0;j<N;j++)
- // A[i][j] = A[i-1][j-1]+k;
- // then it is a good order.
- if (AR->getLoop() == InnerLoop) {
- // We found an InnerLoop induction after OuterLoop induction. It is
- // a good order.
- FoundInnerInduction = true;
- if (FoundOuterInduction) {
- GoodOrder++;
- break;
- }
- }
- // If we find the outer induction after an inner induction e.g.
- // for(int i=0;i<N;i++)
- // for(int j=0;j<N;j++)
- // A[j][i] = A[j-1][i-1]+k;
- // then it is a bad order.
- if (AR->getLoop() == OuterLoop) {
- // We found an OuterLoop induction after InnerLoop induction. It is
- // a bad order.
- FoundOuterInduction = true;
- if (FoundInnerInduction) {
- BadOrder++;
- break;
- }
- }
- }
+ const SCEV *OuterStep = SE->getAbsExpr(*OuterCoeff, /*IsNSW=*/false);
+ const SCEV *InnerStep = SE->getAbsExpr(*InnerCoeff, /*IsNSW=*/false);
+ if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, InnerStep, OuterStep)) {
+ // If we find the inner induction after an outer induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[i][j] = A[i-1][j-1]+k;
+ // then it is a good order.
+ GoodOrder++;
+ } else if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, OuterStep,
+ InnerStep)) {
+ // If we find the outer induction after an inner induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[j][i] = A[j-1][i-1]+k;
+ // then it is a bad order.
+ BadOrder++;
}
}
}
diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
index 8cdf999ce45e4..d0e7a52267442 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -passes=loop-interchange -loop-interchange-profitabilities=ignore -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
@b = constant [200 x [100 x i32]] zeroinitializer, align 4
@a = constant i32 0, align 4
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
index 97f8f90f1159f..6501695f0bd79 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
@@ -10,24 +10,34 @@
define void @profitable(ptr %A) {
; CHECK-LABEL: define void @profitable(
; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
-; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK: [[LOOP_I_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[LOOP_J:.*]]
; CHECK: [[LOOP_J]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
+; CHECK: [[LOOP_I_HEADER]]:
+; CHECK-NEXT: br label %[[LOOP_J1:.*]]
+; CHECK: [[LOOP_J1]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK-NEXT: br label %[[LOOP_I_HEADER_PREHEADER]]
+; CHECK: [[LOOP_J_SPLIT1]]:
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[J]], 100
; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[I]], [[MUL]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT: [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT: [[J_INC:%.*]] = add i64 [[J]], 1
; CHECK-NEXT: [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT: br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
+; CHECK-NEXT: br label %[[LOOP_I_LATCH]]
+; CHECK: [[LOOP_J_SPLIT]]:
+; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
+; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -66,25 +76,35 @@ exit:
define void @profitable_neg_step(ptr %A) {
; CHECK-LABEL: define void @profitable_neg_step(
; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
-; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK: [[LOOP_I_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[LOOP_J:.*]]
; CHECK: [[LOOP_J]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
+; CHECK: [[LOOP_I_HEADER]]:
+; CHECK-NEXT: br label %[[LOOP_J1:.*]]
+; CHECK: [[LOOP_J1]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK-NEXT: br label %[[LOOP_I_HEADER_PREHEADER]]
+; CHECK: [[LOOP_J_SPLIT1]]:
; CHECK-NEXT: [[J_REV:%.*]] = sub i64 9, [[J]]
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[J_REV]], 100
; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[I]], [[MUL]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT: [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT: [[J_INC:%.*]] = add i64 [[J]], 1
; CHECK-NEXT: [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT: br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
+; CHECK-NEXT: br label %[[LOOP_I_LATCH]]
+; CHECK: [[LOOP_J_SPLIT]]:
+; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
+; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -124,34 +144,24 @@ exit:
define void @unprofitable(ptr %A) {
; CHECK-LABEL: define void @unprofitable(
; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[LOOP_J_PREHEADER:.*]]
-; CHECK: [[LOOP_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT: [[LOOP_I_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER_PREHEADER]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[I]], 100
; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK: [[LOOP_J_PREHEADER]]:
-; CHECK-NEXT: br label %[[LOOP_J:.*]]
-; CHECK: [[LOOP_J]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
-; CHECK-NEXT: br label %[[LOOP_I_HEADER_PREHEADER]]
; CHECK: [[LOOP_J_SPLIT1]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[TMP0:%.*]], %[[LOOP_J_SPLIT1]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [1 x i8], ptr [[A]], i64 [[J]], i64 [[MUL]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT: [[J_INC:%.*]] = add i64 [[J]], 1
-; CHECK-NEXT: [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT: br label %[[LOOP_I_LATCH]]
-; CHECK: [[LOOP_J_SPLIT]]:
; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_I_LATCH]], label %[[LOOP_J_SPLIT1]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK: [[LOOP_J_SPLIT]]:
; CHECK-NEXT: ret void
;
entry:
>From e6a6eeba54eb4a3110eb4706dd36e39c90a97d75 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 20 Feb 2026 23:19:37 +0900
Subject: [PATCH 2/3] address review comments
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5420ce1245334..b0ab2c1155792 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1562,6 +1562,8 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
/// value of the addrec to find the coefficients. If the expression is in a
/// complex form, e.g., (addrec + addrec), then the coefficients may not be
/// found.
+/// TODO: Handle more complex cases. Maybe using SCEVTraversal is a good way to
+/// do that.
static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
const Loop *L0,
std::optional<const SCEV *> &Coeff0,
@@ -1574,10 +1576,16 @@ static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
return;
}
- if (AR->getLoop() == L0)
+ if (AR->getLoop() == L0) {
+ assert(!Coeff0.has_value() &&
+ "Found more than one addrec for the same loop");
Coeff0 = AR->getStepRecurrence(SE);
- if (AR->getLoop() == L1)
+ }
+ if (AR->getLoop() == L1) {
+ assert(!Coeff1.has_value() &&
+ "Found more than one addrec for the same loop");
Coeff1 = AR->getStepRecurrence(SE);
+ }
getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
}
@@ -1595,6 +1603,12 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
continue;
+ // This heuristic assumes that a smaller step recurrence implies that the
+ // induction variable corresponding to the loop is used in the inner
+ // dimension of the array. Placing such a loop in the inner position would
+ // be beneficial in terms of locality. If the array access is of the form
+ // like `A[3*i + 2*j]`, this heuristic may lead to an unprofitable
+ // interchange, but we expect such cases to be rare.
const SCEV *OuterStep = SE->getAbsExpr(*OuterCoeff, /*IsNSW=*/false);
const SCEV *InnerStep = SE->getAbsExpr(*InnerCoeff, /*IsNSW=*/false);
if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, InnerStep, OuterStep)) {
>From 8769836c6e3f971165e6918f0148379118c49a43 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 20 Feb 2026 23:19:37 +0900
Subject: [PATCH 3/3] update
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 58 ++++++++++---------
.../profitability-instorder.ll | 56 +++++++++---------
2 files changed, 58 insertions(+), 56 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b0ab2c1155792..93d43c76885a7 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1556,37 +1556,36 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
return CostMap;
}
-/// If \S contains an affine addrec for \p L0, store the step recurrence of the
-/// addrec in \p Coeff0. Same for \p L1 and \p Coeff1. This function assumes \p
-/// S is an nested affine addrec, and it will recursively look through the start
-/// value of the addrec to find the coefficients. If the expression is in a
-/// complex form, e.g., (addrec + addrec), then the coefficients may not be
-/// found.
+/// If \S contains an affine addrec for \p L, return the step recurrence of it.
+/// If \S is loop invariant with respect to \p L, return nullptr. Otherwise,
+/// return std::nullopt, which indicates we cannot determine the coefficient of
+/// the addrec for \p L in \S.
/// TODO: Handle more complex cases. Maybe using SCEVTraversal is a good way to
/// do that.
-static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
- const Loop *L0,
- std::optional<const SCEV *> &Coeff0,
- const Loop *L1,
- std::optional<const SCEV *> &Coeff1) {
+std::optional<const SCEV *> getAddRecCoefficient(ScalarEvolution &SE,
+ const SCEV *S, const Loop *L) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (!AR)
- return;
+ if (!AR) {
+ if (SE.isLoopInvariant(S, L))
+ return nullptr;
+ return std::nullopt;
+ }
+
if (!AR->isAffine()) {
LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
- return;
- }
- if (AR->getLoop() == L0) {
- assert(!Coeff0.has_value() &&
- "Found more than one addrec for the same loop");
- Coeff0 = AR->getStepRecurrence(SE);
+ return std::nullopt;
}
- if (AR->getLoop() == L1) {
- assert(!Coeff1.has_value() &&
- "Found more than one addrec for the same loop");
- Coeff1 = AR->getStepRecurrence(SE);
+
+ std::optional<const SCEV *> Coeff =
+ getAddRecCoefficient(SE, AR->getStart(), L);
+ if (!Coeff.has_value())
+ return std::nullopt;
+
+ if (AR->getLoop() == L) {
+ assert(!*Coeff && "Found more than one addrec for the same loop");
+ Coeff = AR->getStepRecurrence(SE);
}
- getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
+ return Coeff;
}
int LoopInterchangeProfitability::getInstrOrderCost() {
@@ -1597,10 +1596,13 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
if (!isa<LoadInst, StoreInst>(&Ins))
continue;
const SCEV *Ptr = SE->getSCEV(getLoadStorePointerOperand(&Ins));
- std::optional<const SCEV *> OuterCoeff, InnerCoeff;
- getAddRecCoefficients(*SE, Ptr, OuterLoop, OuterCoeff, InnerLoop,
- InnerCoeff);
- if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
+ std::optional<const SCEV *> OuterCoeff =
+ getAddRecCoefficient(*SE, Ptr, OuterLoop);
+ std::optional<const SCEV *> InnerCoeff =
+ getAddRecCoefficient(*SE, Ptr, InnerLoop);
+
+ if (!OuterCoeff.has_value() || !*OuterCoeff || !InnerCoeff.has_value() ||
+ !*InnerCoeff)
continue;
// This heuristic assumes that a smaller step recurrence implies that the
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
index 6501695f0bd79..36834fe2fd070 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
@@ -11,16 +11,16 @@ define void @profitable(ptr %A) {
; CHECK-LABEL: define void @profitable(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
+; CHECK-NEXT: br label %[[LOOP_J_PREHEADER:.*]]
; CHECK: [[LOOP_I_HEADER_PREHEADER:.*]]:
-; CHECK-NEXT: br label %[[LOOP_J:.*]]
-; CHECK: [[LOOP_J]]:
+; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
+; CHECK: [[LOOP_I_HEADER]]:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: br label %[[LOOP_J1:.*]]
-; CHECK: [[LOOP_J1]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK: [[LOOP_J_PREHEADER]]:
+; CHECK-NEXT: br label %[[LOOP_J:.*]]
+; CHECK: [[LOOP_J]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_I_HEADER_PREHEADER]]
; CHECK: [[LOOP_J_SPLIT1]]:
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[J]], 100
@@ -33,11 +33,11 @@ define void @profitable(ptr %A) {
; CHECK: [[LOOP_J_SPLIT]]:
; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
+; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -77,16 +77,16 @@ define void @profitable_neg_step(ptr %A) {
; CHECK-LABEL: define void @profitable_neg_step(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
+; CHECK-NEXT: br label %[[LOOP_J_PREHEADER:.*]]
; CHECK: [[LOOP_I_HEADER_PREHEADER:.*]]:
-; CHECK-NEXT: br label %[[LOOP_J:.*]]
-; CHECK: [[LOOP_J]]:
+; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
+; CHECK: [[LOOP_I_HEADER]]:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: br label %[[LOOP_J1:.*]]
-; CHECK: [[LOOP_J1]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK: [[LOOP_J_PREHEADER]]:
+; CHECK-NEXT: br label %[[LOOP_J:.*]]
+; CHECK: [[LOOP_J]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
; CHECK-NEXT: br label %[[LOOP_I_HEADER_PREHEADER]]
; CHECK: [[LOOP_J_SPLIT1]]:
; CHECK-NEXT: [[J_REV:%.*]] = sub i64 9, [[J]]
@@ -100,11 +100,11 @@ define void @profitable_neg_step(ptr %A) {
; CHECK: [[LOOP_J_SPLIT]]:
; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
+; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -144,24 +144,24 @@ exit:
define void @unprofitable(ptr %A) {
; CHECK-LABEL: define void @unprofitable(
; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[LOOP_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP_I_HEADER:.*]]
; CHECK: [[LOOP_I_HEADER]]:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER_PREHEADER]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[I]], 100
-; CHECK-NEXT: br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK: [[LOOP_J_SPLIT1]]:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[TMP0:%.*]], %[[LOOP_J_SPLIT1]] ]
+; CHECK-NEXT: br label %[[LOOP_J:.*]]
+; CHECK: [[LOOP_J]]:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [1 x i8], ptr [[A]], i64 [[J]], i64 [[MUL]]
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT: [[TMP0]] = add i64 [[J]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_I_LATCH]], label %[[LOOP_J_SPLIT1]]
+; CHECK-NEXT: [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT: [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
+; CHECK-NEXT: br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
; CHECK: [[LOOP_I_LATCH]]:
; CHECK-NEXT: [[I_INC]] = add i64 [[I]], 1
; CHECK-NEXT: [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT: br i1 [[EC_I]], label %[[LOOP_J_SPLIT:.*]], label %[[LOOP_I_HEADER]]
-; CHECK: [[LOOP_J_SPLIT]]:
+; CHECK-NEXT: br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-branch-commits
mailing list