[llvm-branch-commits] [llvm] [LoopInterchange] Make the instorder profitability check GEP-independent (PR #181991)

Ryotaro Kasuga via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri May 15 06:30:54 PDT 2026


https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/181991

>From f3e056c43d636f436737d3fb3e61a2fae7adf99b Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 17 Feb 2026 10:56:58 +0000
Subject: [PATCH 1/3] [LoopInterchange] Fix instorder profitability check

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 91 ++++++++++---------
 ...erchangeable-outerloop-multiple-indvars.ll |  2 +-
 .../profitability-instorder.ll                | 70 ++++++++------
 3 files changed, 91 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 91e2510c33851..5420ce1245334 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1556,53 +1556,62 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
   return CostMap;
 }
 
+/// If \S contains an affine addrec for \p L0, store the step recurrence of the
+/// addrec in \p Coeff0. Same for \p L1 and \p Coeff1. This function assumes \p
+/// S is an nested affine addrec, and it will recursively look through the start
+/// value of the addrec to find the coefficients. If the expression is in a
+/// complex form, e.g., (addrec + addrec), then the coefficients may not be
+/// found.
+static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
+                                  const Loop *L0,
+                                  std::optional<const SCEV *> &Coeff0,
+                                  const Loop *L1,
+                                  std::optional<const SCEV *> &Coeff1) {
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (!AR)
+    return;
+  if (!AR->isAffine()) {
+    LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
+    return;
+  }
+  if (AR->getLoop() == L0)
+    Coeff0 = AR->getStepRecurrence(SE);
+  if (AR->getLoop() == L1)
+    Coeff1 = AR->getStepRecurrence(SE);
+  getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
+}
+
 int LoopInterchangeProfitability::getInstrOrderCost() {
   unsigned GoodOrder, BadOrder;
   BadOrder = GoodOrder = 0;
   for (BasicBlock *BB : InnerLoop->blocks()) {
     for (Instruction &Ins : *BB) {
-      if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
-        bool FoundInnerInduction = false;
-        bool FoundOuterInduction = false;
-        for (Value *Op : GEP->operands()) {
-          // Skip operands that are not SCEV-able.
-          if (!SE->isSCEVable(Op->getType()))
-            continue;
-
-          const SCEV *OperandVal = SE->getSCEV(Op);
-          const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
-          if (!AR)
-            continue;
+      if (!isa<LoadInst, StoreInst>(&Ins))
+        continue;
+      const SCEV *Ptr = SE->getSCEV(getLoadStorePointerOperand(&Ins));
+      std::optional<const SCEV *> OuterCoeff, InnerCoeff;
+      getAddRecCoefficients(*SE, Ptr, OuterLoop, OuterCoeff, InnerLoop,
+                            InnerCoeff);
+      if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
+        continue;
 
-          // If we find the inner induction after an outer induction e.g.
-          // for(int i=0;i<N;i++)
-          //   for(int j=0;j<N;j++)
-          //     A[i][j] = A[i-1][j-1]+k;
-          // then it is a good order.
-          if (AR->getLoop() == InnerLoop) {
-            // We found an InnerLoop induction after OuterLoop induction. It is
-            // a good order.
-            FoundInnerInduction = true;
-            if (FoundOuterInduction) {
-              GoodOrder++;
-              break;
-            }
-          }
-          // If we find the outer induction after an inner induction e.g.
-          // for(int i=0;i<N;i++)
-          //   for(int j=0;j<N;j++)
-          //     A[j][i] = A[j-1][i-1]+k;
-          // then it is a bad order.
-          if (AR->getLoop() == OuterLoop) {
-            // We found an OuterLoop induction after InnerLoop induction. It is
-            // a bad order.
-            FoundOuterInduction = true;
-            if (FoundInnerInduction) {
-              BadOrder++;
-              break;
-            }
-          }
-        }
+      const SCEV *OuterStep = SE->getAbsExpr(*OuterCoeff, /*IsNSW=*/false);
+      const SCEV *InnerStep = SE->getAbsExpr(*InnerCoeff, /*IsNSW=*/false);
+      if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, InnerStep, OuterStep)) {
+        // If we find the inner induction after an outer induction e.g.
+        // for(int i=0;i<N;i++)
+        //   for(int j=0;j<N;j++)
+        //     A[i][j] = A[i-1][j-1]+k;
+        // then it is a good order.
+        GoodOrder++;
+      } else if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, OuterStep,
+                                      InnerStep)) {
+        // If we find the outer induction after an inner induction e.g.
+        // for(int i=0;i<N;i++)
+        //   for(int j=0;j<N;j++)
+        //     A[j][i] = A[j-1][i-1]+k;
+        // then it is a bad order.
+        BadOrder++;
       }
     }
   }
diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
index 8cdf999ce45e4..d0e7a52267442 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -passes=loop-interchange -loop-interchange-profitabilities=ignore -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
 
 @b = constant [200 x [100 x i32]] zeroinitializer, align 4
 @a = constant i32 0, align 4
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
index 97f8f90f1159f..6501695f0bd79 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
@@ -10,24 +10,34 @@
 define void @profitable(ptr %A) {
 ; CHECK-LABEL: define void @profitable(
 ; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
-; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK:       [[LOOP_I_HEADER_PREHEADER:.*]]:
 ; CHECK-NEXT:    br label %[[LOOP_J:.*]]
 ; CHECK:       [[LOOP_J]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
+; CHECK:       [[LOOP_I_HEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_J1:.*]]
+; CHECK:       [[LOOP_J1]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_I_HEADER_PREHEADER]]
+; CHECK:       [[LOOP_J_SPLIT1]]:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[J]], 100
 ; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[I]], [[MUL]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT:    [[J_INC:%.*]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
+; CHECK-NEXT:    br label %[[LOOP_I_LATCH]]
+; CHECK:       [[LOOP_J_SPLIT]]:
+; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -66,25 +76,35 @@ exit:
 define void @profitable_neg_step(ptr %A) {
 ; CHECK-LABEL: define void @profitable_neg_step(
 ; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
-; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK:       [[LOOP_I_HEADER_PREHEADER:.*]]:
 ; CHECK-NEXT:    br label %[[LOOP_J:.*]]
 ; CHECK:       [[LOOP_J]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
+; CHECK:       [[LOOP_I_HEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_J1:.*]]
+; CHECK:       [[LOOP_J1]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK-NEXT:    br label %[[LOOP_I_HEADER_PREHEADER]]
+; CHECK:       [[LOOP_J_SPLIT1]]:
 ; CHECK-NEXT:    [[J_REV:%.*]] = sub i64 9, [[J]]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[J_REV]], 100
 ; CHECK-NEXT:    [[OFFSET:%.*]] = add i64 [[I]], [[MUL]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT:    [[J_INC:%.*]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
+; CHECK-NEXT:    br label %[[LOOP_I_LATCH]]
+; CHECK:       [[LOOP_J_SPLIT]]:
+; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -124,34 +144,24 @@ exit:
 define void @unprofitable(ptr %A) {
 ; CHECK-LABEL: define void @unprofitable(
 ; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[LOOP_J_PREHEADER:.*]]
-; CHECK:       [[LOOP_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT:  [[LOOP_I_HEADER_PREHEADER:.*]]:
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
 ; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER_PREHEADER]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[I]], 100
 ; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK:       [[LOOP_J_PREHEADER]]:
-; CHECK-NEXT:    br label %[[LOOP_J:.*]]
-; CHECK:       [[LOOP_J]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
-; CHECK-NEXT:    br label %[[LOOP_I_HEADER_PREHEADER]]
 ; CHECK:       [[LOOP_J_SPLIT1]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[TMP0:%.*]], %[[LOOP_J_SPLIT1]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x i8], ptr [[A]], i64 [[J]], i64 [[MUL]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[J_INC:%.*]] = add i64 [[J]], 1
-; CHECK-NEXT:    [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
-; CHECK-NEXT:    br label %[[LOOP_I_LATCH]]
-; CHECK:       [[LOOP_J_SPLIT]]:
 ; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_I_LATCH]], label %[[LOOP_J_SPLIT1]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
-; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK:       [[LOOP_J_SPLIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:

>From e6a6eeba54eb4a3110eb4706dd36e39c90a97d75 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 20 Feb 2026 23:19:37 +0900
Subject: [PATCH 2/3] address review comments

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5420ce1245334..b0ab2c1155792 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1562,6 +1562,8 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
 /// value of the addrec to find the coefficients. If the expression is in a
 /// complex form, e.g., (addrec + addrec), then the coefficients may not be
 /// found.
+/// TODO: Handle more complex cases. Maybe using SCEVTraversal is a good way to
+/// do that.
 static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
                                   const Loop *L0,
                                   std::optional<const SCEV *> &Coeff0,
@@ -1574,10 +1576,16 @@ static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
     LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
     return;
   }
-  if (AR->getLoop() == L0)
+  if (AR->getLoop() == L0) {
+    assert(!Coeff0.has_value() &&
+           "Found more than one addrec for the same loop");
     Coeff0 = AR->getStepRecurrence(SE);
-  if (AR->getLoop() == L1)
+  }
+  if (AR->getLoop() == L1) {
+    assert(!Coeff1.has_value() &&
+           "Found more than one addrec for the same loop");
     Coeff1 = AR->getStepRecurrence(SE);
+  }
   getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
 }
 
@@ -1595,6 +1603,12 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
       if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
         continue;
 
+      // This heuristic assumes that a smaller step recurrence implies that the
+      // induction variable corresponding to the loop is used in the inner
+      // dimension of the array. Placing such a loop in the inner position would
+      // be beneficial in terms of locality. If the array access is of the form
+      // like `A[3*i + 2*j]`, this heuristic may lead to an unprofitable
+      // interchange, but we expect such cases to be rare.
       const SCEV *OuterStep = SE->getAbsExpr(*OuterCoeff, /*IsNSW=*/false);
       const SCEV *InnerStep = SE->getAbsExpr(*InnerCoeff, /*IsNSW=*/false);
       if (SE->isKnownPredicate(ICmpInst::ICMP_SLT, InnerStep, OuterStep)) {

>From 8769836c6e3f971165e6918f0148379118c49a43 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 20 Feb 2026 23:19:37 +0900
Subject: [PATCH 3/3] update

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 58 ++++++++++---------
 .../profitability-instorder.ll                | 56 +++++++++---------
 2 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b0ab2c1155792..93d43c76885a7 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1556,37 +1556,36 @@ const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
   return CostMap;
 }
 
-/// If \S contains an affine addrec for \p L0, store the step recurrence of the
-/// addrec in \p Coeff0. Same for \p L1 and \p Coeff1. This function assumes \p
-/// S is an nested affine addrec, and it will recursively look through the start
-/// value of the addrec to find the coefficients. If the expression is in a
-/// complex form, e.g., (addrec + addrec), then the coefficients may not be
-/// found.
+/// If \S contains an affine addrec for \p L, return the step recurrence of it.
+/// If \S is loop invariant with respect to \p L, return nullptr. Otherwise,
+/// return std::nullopt, which indicates we cannot determine the coefficient of
+/// the addrec for \p L in \S.
 /// TODO: Handle more complex cases. Maybe using SCEVTraversal is a good way to
 /// do that.
-static void getAddRecCoefficients(ScalarEvolution &SE, const SCEV *S,
-                                  const Loop *L0,
-                                  std::optional<const SCEV *> &Coeff0,
-                                  const Loop *L1,
-                                  std::optional<const SCEV *> &Coeff1) {
+std::optional<const SCEV *> getAddRecCoefficient(ScalarEvolution &SE,
+                                                 const SCEV *S, const Loop *L) {
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
-  if (!AR)
-    return;
+  if (!AR) {
+    if (SE.isLoopInvariant(S, L))
+      return nullptr;
+    return std::nullopt;
+  }
+
   if (!AR->isAffine()) {
     LLVM_DEBUG(dbgs() << "Unexpected non-affine addrec\n");
-    return;
-  }
-  if (AR->getLoop() == L0) {
-    assert(!Coeff0.has_value() &&
-           "Found more than one addrec for the same loop");
-    Coeff0 = AR->getStepRecurrence(SE);
+    return std::nullopt;
   }
-  if (AR->getLoop() == L1) {
-    assert(!Coeff1.has_value() &&
-           "Found more than one addrec for the same loop");
-    Coeff1 = AR->getStepRecurrence(SE);
+
+  std::optional<const SCEV *> Coeff =
+      getAddRecCoefficient(SE, AR->getStart(), L);
+  if (!Coeff.has_value())
+    return std::nullopt;
+
+  if (AR->getLoop() == L) {
+    assert(!*Coeff && "Found more than one addrec for the same loop");
+    Coeff = AR->getStepRecurrence(SE);
   }
-  getAddRecCoefficients(SE, AR->getStart(), L0, Coeff0, L1, Coeff1);
+  return Coeff;
 }
 
 int LoopInterchangeProfitability::getInstrOrderCost() {
@@ -1597,10 +1596,13 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
       if (!isa<LoadInst, StoreInst>(&Ins))
         continue;
       const SCEV *Ptr = SE->getSCEV(getLoadStorePointerOperand(&Ins));
-      std::optional<const SCEV *> OuterCoeff, InnerCoeff;
-      getAddRecCoefficients(*SE, Ptr, OuterLoop, OuterCoeff, InnerLoop,
-                            InnerCoeff);
-      if (!InnerCoeff.has_value() || !OuterCoeff.has_value())
+      std::optional<const SCEV *> OuterCoeff =
+          getAddRecCoefficient(*SE, Ptr, OuterLoop);
+      std::optional<const SCEV *> InnerCoeff =
+          getAddRecCoefficient(*SE, Ptr, InnerLoop);
+
+      if (!OuterCoeff.has_value() || !*OuterCoeff || !InnerCoeff.has_value() ||
+          !*InnerCoeff)
         continue;
 
       // This heuristic assumes that a smaller step recurrence implies that the
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
index 6501695f0bd79..36834fe2fd070 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-instorder.ll
@@ -11,16 +11,16 @@ define void @profitable(ptr %A) {
 ; CHECK-LABEL: define void @profitable(
 ; CHECK-SAME: ptr [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
+; CHECK-NEXT:    br label %[[LOOP_J_PREHEADER:.*]]
 ; CHECK:       [[LOOP_I_HEADER_PREHEADER:.*]]:
-; CHECK-NEXT:    br label %[[LOOP_J:.*]]
-; CHECK:       [[LOOP_J]]:
+; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
+; CHECK:       [[LOOP_I_HEADER]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
 ; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    br label %[[LOOP_J1:.*]]
-; CHECK:       [[LOOP_J1]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK:       [[LOOP_J_PREHEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_J:.*]]
+; CHECK:       [[LOOP_J]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER_PREHEADER]]
 ; CHECK:       [[LOOP_J_SPLIT1]]:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[J]], 100
@@ -33,11 +33,11 @@ define void @profitable(ptr %A) {
 ; CHECK:       [[LOOP_J_SPLIT]]:
 ; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -77,16 +77,16 @@ define void @profitable_neg_step(ptr %A) {
 ; CHECK-LABEL: define void @profitable_neg_step(
 ; CHECK-SAME: ptr [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
+; CHECK-NEXT:    br label %[[LOOP_J_PREHEADER:.*]]
 ; CHECK:       [[LOOP_I_HEADER_PREHEADER:.*]]:
-; CHECK-NEXT:    br label %[[LOOP_J:.*]]
-; CHECK:       [[LOOP_J]]:
+; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
+; CHECK:       [[LOOP_I_HEADER]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ], [ 0, %[[LOOP_I_HEADER_PREHEADER]] ]
 ; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    br label %[[LOOP_J1:.*]]
-; CHECK:       [[LOOP_J1]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_I_HEADER]] ]
+; CHECK:       [[LOOP_J_PREHEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_J:.*]]
+; CHECK:       [[LOOP_J]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[LOOP_J_SPLIT:.*]] ], [ 0, %[[LOOP_J_PREHEADER]] ]
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER_PREHEADER]]
 ; CHECK:       [[LOOP_J_SPLIT1]]:
 ; CHECK-NEXT:    [[J_REV:%.*]] = sub i64 9, [[J]]
@@ -100,11 +100,11 @@ define void @profitable_neg_step(ptr %A) {
 ; CHECK:       [[LOOP_J_SPLIT]]:
 ; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J1]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP_J]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_J]]
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT]], label %[[LOOP_I_HEADER]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -144,24 +144,24 @@ exit:
 define void @unprofitable(ptr %A) {
 ; CHECK-LABEL: define void @unprofitable(
 ; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT:  [[LOOP_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br label %[[LOOP_I_HEADER:.*]]
 ; CHECK:       [[LOOP_I_HEADER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER_PREHEADER]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_I_LATCH:.*]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[I]], 100
-; CHECK-NEXT:    br label %[[LOOP_J_SPLIT1:.*]]
-; CHECK:       [[LOOP_J_SPLIT1]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[TMP0:%.*]], %[[LOOP_J_SPLIT1]] ]
+; CHECK-NEXT:    br label %[[LOOP_J:.*]]
+; CHECK:       [[LOOP_J]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, %[[LOOP_I_HEADER]] ], [ [[J_INC:%.*]], %[[LOOP_J]] ]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [1 x i8], ptr [[A]], i64 [[J]], i64 [[MUL]]
 ; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
-; CHECK-NEXT:    [[TMP0]] = add i64 [[J]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 10
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_I_LATCH]], label %[[LOOP_J_SPLIT1]]
+; CHECK-NEXT:    [[J_INC]] = add i64 [[J]], 1
+; CHECK-NEXT:    [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 10
+; CHECK-NEXT:    br i1 [[EC_J]], label %[[LOOP_I_LATCH]], label %[[LOOP_J]]
 ; CHECK:       [[LOOP_I_LATCH]]:
 ; CHECK-NEXT:    [[I_INC]] = add i64 [[I]], 1
 ; CHECK-NEXT:    [[EC_I:%.*]] = icmp eq i64 [[I_INC]], 10
-; CHECK-NEXT:    br i1 [[EC_I]], label %[[LOOP_J_SPLIT:.*]], label %[[LOOP_I_HEADER]]
-; CHECK:       [[LOOP_J_SPLIT]]:
+; CHECK-NEXT:    br i1 [[EC_I]], label %[[EXIT:.*]], label %[[LOOP_I_HEADER]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:



More information about the llvm-branch-commits mailing list