[llvm] [LoopInterchange] Ignore the cost-model, force interchange if legal (PR #148858)

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 02:19:50 PDT 2025


https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/148858

>From db9c37e08f5fbb4458ea87f539eae41c275a8a2e Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Tue, 15 Jul 2025 04:04:01 -0700
Subject: [PATCH 1/9] [LoopInterchange] Ignore the cost-model, force
 interchange if legal

This is useful for testing purposes, to get more test coverage.
---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp             | 7 ++++++-
 .../LoopInterchange/profitability-vectorization.ll         | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a5008907b9014..b62c25d52861b 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned int> MaxMemInstrCount(
         "in the dependency matrix. Higher value may lead to more interchanges "
         "at the cost of compile-time"));
 
+static cl::opt<bool> ForceLoopInterchange(
+    "loop-interchange-force", cl::init(false), cl::Hidden,
+    cl::desc("Ignore the cost model, and force interchange if it is legal"));
+
 namespace {
 
 using LoopVector = SmallVector<Loop *, 8>;
@@ -599,7 +603,8 @@ struct LoopInterchange {
     }
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
-    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+    if (!ForceLoopInterchange &&
+        !LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
                           DependencyMatrix, CCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index 16952a66aa78e..59196bbbb9c00 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -2,6 +2,10 @@
 ; RUN:     -pass-remarks-output=%t -disable-output
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-force=true
+; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
+
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
 ; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s

>From fe979eaa158f7298a4dfa28d012b72c70c919ddc Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Wed, 16 Jul 2025 07:55:55 -0700
Subject: [PATCH 2/9] Add an ignore value to the existing profitability option.

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 17 ++++++-
 .../LoopInterchange/force-interchange.ll      | 50 +++++++++++++++++++
 .../profitability-vectorization.ll            |  2 +-
 3 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/force-interchange.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b62c25d52861b..4c16570f52ec9 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -82,6 +82,7 @@ enum class RuleTy {
   PerLoopCacheAnalysis,
   PerInstrOrderCost,
   ForVectorization,
+  Ignore
 };
 
 } // end anonymous namespace
@@ -110,7 +111,10 @@ static cl::list<RuleTy> Profitabilities(
                clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
                           "Prioritize the IVs order of each instruction"),
                clEnumValN(RuleTy::ForVectorization, "vectorize",
-                          "Prioritize vectorization")));
+                          "Prioritize vectorization"),
+               clEnumValN(RuleTy::Ignore, "ignore",
+                          "Ignore profitability, force interchange (does not "
+			  "work with other options)")));
 
 #ifndef NDEBUG
 static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
@@ -1291,6 +1295,12 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
+
+  // Return true if interchange is forced.
+  if (Profitabilities.size() == 1 &&
+      Profitabilities[0] == RuleTy::Ignore)
+    return true;
+
   // isProfitable() is structured to avoid endless loop interchange. If the
   // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
   // decide the profitability then, profitability check will stop and return the
@@ -1316,6 +1326,11 @@ bool LoopInterchangeProfitability::isProfitable(
       shouldInterchange =
           isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
       break;
+    case RuleTy::Ignore:
+      // TODO? We ignore the force option when it appears in a list, i.e. it
+      // should occur as the only option to be effective, as mentioned in the
+      // help.
+      break;
     }
 
     // If this rule could determine the profitability, don't call subsequent
diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
new file mode 100644
index 0000000000000..14af2ba5f1d3f
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore -S
+; RUN: FileCheck --input-file=%t %s
+
+; There should be no reason to interchange this, unless it is forced.
+;
+;     for (int i = 0; i<N; i++)
+;       for (int j = 0; j<N; j++)
+;         A[i][j] = 42;
+;
+; CHECK:      --- !Analysis
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Function:        f
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
+; CHECK-NEXT: ...
+; CHECK-NEXT: --- !Passed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Function:        f
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+; CHECK-NEXT: ...
+
+ at A = dso_local local_unnamed_addr global [1024 x [1024 x i32]] zeroinitializer, align 4
+
+define dso_local void @f() local_unnamed_addr #0 {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %indvars.iv17 = phi i64 [ 0, %entry ], [ %indvars.iv.next18, %for.cond.cleanup3 ]
+  br label %for.body4
+
+for.cond.cleanup:
+  ret void
+
+for.cond.cleanup3:
+  %indvars.iv.next18 = add nuw nsw i64 %indvars.iv17, 1
+  %exitcond20.not = icmp eq i64 %indvars.iv.next18, 1024
+  br i1 %exitcond20.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.body4:
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %indvars.iv17, i64 %indvars.iv
+  store i32 42, ptr %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+}
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index 59196bbbb9c00..d86c3d959e6ac 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -3,7 +3,7 @@
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
-; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-force=true
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore 
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
 
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \

>From 5bb2158865bea3b144d7c0f110a73a51ea29f0a4 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:06:24 -0700
Subject: [PATCH 3/9] Remove the old option, we don't need that anymore, left
 it accidentally there.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 4c16570f52ec9..3297272c2904f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -66,10 +66,6 @@ static cl::opt<unsigned int> MaxMemInstrCount(
         "in the dependency matrix. Higher value may lead to more interchanges "
         "at the cost of compile-time"));
 
-static cl::opt<bool> ForceLoopInterchange(
-    "loop-interchange-force", cl::init(false), cl::Hidden,
-    cl::desc("Ignore the cost model, and force interchange if it is legal"));
-
 namespace {
 
 using LoopVector = SmallVector<Loop *, 8>;
@@ -607,8 +603,7 @@ struct LoopInterchange {
     }
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
-    if (!ForceLoopInterchange &&
-        !LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
                           DependencyMatrix, CCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;

>From 1e8e6673d1ef0a04e494cdea22e8c44b9aff8b90 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:10:43 -0700
Subject: [PATCH 4/9] Fix formatting.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 3297272c2904f..62c33446fb5ff 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -110,7 +110,7 @@ static cl::list<RuleTy> Profitabilities(
                           "Prioritize vectorization"),
                clEnumValN(RuleTy::Ignore, "ignore",
                           "Ignore profitability, force interchange (does not "
-			  "work with other options)")));
+                          "work with other options)")));
 
 #ifndef NDEBUG
 static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
@@ -1292,8 +1292,7 @@ bool LoopInterchangeProfitability::isProfitable(
     unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
 
   // Return true if interchange is forced.
-  if (Profitabilities.size() == 1 &&
-      Profitabilities[0] == RuleTy::Ignore)
+  if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
     return true;
 
   // isProfitable() is structured to avoid endless loop interchange. If the

>From 6a3b3f03623113e7b646f438fd506ebf26ac512f Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 14:33:13 +0100
Subject: [PATCH 5/9] Update
 llvm/test/Transforms/LoopInterchange/force-interchange.ll

Co-authored-by: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
---
 llvm/test/Transforms/LoopInterchange/force-interchange.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
index 14af2ba5f1d3f..7504c96a9e7fc 100644
--- a/llvm/test/Transforms/LoopInterchange/force-interchange.ll
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -3,8 +3,8 @@
 
 ; There should be no reason to interchange this, unless it is forced.
 ;
-;     for (int i = 0; i<N; i++)
-;       for (int j = 0; j<N; j++)
+;     for (int i = 0; i<1024; i++)
+;       for (int j = 0; j<1024; j++)
 ;         A[i][j] = 42;
 ;
 ; CHECK:      --- !Analysis

>From 82a3043df9371d09e87b10c699eda696ad203ed3 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:53:30 -0700
Subject: [PATCH 6/9] Addressed review comments

---
 .../test/Transforms/LoopInterchange/force-interchange.ll | 9 +--------
 .../LoopInterchange/profitability-vectorization.ll       | 4 ----
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
index 7504c96a9e7fc..a0f2064516675 100644
--- a/llvm/test/Transforms/LoopInterchange/force-interchange.ll
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -7,14 +7,7 @@
 ;       for (int j = 0; j<1024; j++)
 ;         A[i][j] = 42;
 ;
-; CHECK:      --- !Analysis
-; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
-; CHECK-NEXT: Function:        f
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
-; CHECK-NEXT: ...
-; CHECK-NEXT: --- !Passed
+; CHECK:      --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Interchanged
 ; CHECK-NEXT: Function:        f
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index d86c3d959e6ac..16952a66aa78e 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -2,10 +2,6 @@
 ; RUN:     -pass-remarks-output=%t -disable-output
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
-; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
-; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore 
-; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
-
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
 ; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s

>From 58a6efb62f59b44393bd39474aa95a3228f25eb5 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 08:40:20 -0700
Subject: [PATCH 7/9] Debug message when ignore has no effect.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 62c33446fb5ff..c5ff14dd3c8c4 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1321,9 +1321,9 @@ bool LoopInterchangeProfitability::isProfitable(
           isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
       break;
     case RuleTy::Ignore:
-      // TODO? We ignore the force option when it appears in a list, i.e. it
-      // should occur as the only option to be effective, as mentioned in the
-      // help.
+      LLVM_DEBUG(dbgs() << "Interchange profitability: option 'ignore' has no "
+                           "effect in combination with other options\n";
+                 dbgs() << "To force interchange, only use 'ignore'");
       break;
     }
 

>From 057d32b3e98c8db1dec0974d2420cb83c2dce860 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Fri, 18 Jul 2025 01:50:02 -0700
Subject: [PATCH 8/9] noDuplicateRulesAndIgnore: assert that ignore doesn't
 appear in a list.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index c5ff14dd3c8c4..093b97646871e 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -113,11 +113,14 @@ static cl::list<RuleTy> Profitabilities(
                           "work with other options)")));
 
 #ifndef NDEBUG
-static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
+static bool noDuplicateRulesAndIgnore(ArrayRef<RuleTy> Rules) {
   SmallSet<RuleTy, 4> Set;
-  for (RuleTy Rule : Rules)
+  for (RuleTy Rule : Rules) {
     if (!Set.insert(Rule).second)
       return false;
+    if (Rule == RuleTy::Ignore)
+      return false;
+  }
   return true;
 }
 
@@ -1291,9 +1294,11 @@ bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
 
-  // Return true if interchange is forced.
+  // Return true if interchange is forced and the cost-model ignored.
   if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
     return true;
+  assert(noDuplicateRulesAndIgnore(Profitabilities) &&
+         "Duplicate rules and option 'ignore' are not allowed");
 
   // isProfitable() is structured to avoid endless loop interchange. If the
   // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
@@ -1303,7 +1308,6 @@ bool LoopInterchangeProfitability::isProfitable(
   // second highest priority rule (isProfitablePerInstrOrderCost by default).
   // Likewise, if it failed to analysis the profitability then only, the last
   // rule (isProfitableForVectorization by default) will decide.
-  assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
   std::optional<bool> shouldInterchange;
   for (RuleTy RT : Profitabilities) {
     switch (RT) {
@@ -1321,9 +1325,7 @@ bool LoopInterchangeProfitability::isProfitable(
           isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
       break;
     case RuleTy::Ignore:
-      LLVM_DEBUG(dbgs() << "Interchange profitability: option 'ignore' has no "
-                           "effect in combination with other options\n";
-                 dbgs() << "To force interchange, only use 'ignore'");
+      // Nothing to do, this has no effect.
       break;
     }
 

>From b5bc6d1bb65bb049f854d5a9de6f2f1230d41f94 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Fri, 18 Jul 2025 02:17:55 -0700
Subject: [PATCH 9/9] Renamed test case labels and variables.

---
 .../LoopInterchange/force-interchange.ll      | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
index a0f2064516675..c33ecdf7d9905 100644
--- a/llvm/test/Transforms/LoopInterchange/force-interchange.ll
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -19,25 +19,25 @@
 
 define dso_local void @f() local_unnamed_addr #0 {
 entry:
-  br label %for.cond1.preheader
+  br label %outer.header
 
-for.cond1.preheader:
-  %indvars.iv17 = phi i64 [ 0, %entry ], [ %indvars.iv.next18, %for.cond.cleanup3 ]
-  br label %for.body4
+outer.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %inner.header ]
+  br label %inner.body
 
-for.cond.cleanup:
-  ret void
-
-for.cond.cleanup3:
-  %indvars.iv.next18 = add nuw nsw i64 %indvars.iv17, 1
-  %exitcond20.not = icmp eq i64 %indvars.iv.next18, 1024
-  br i1 %exitcond20.not, label %for.cond.cleanup, label %for.cond1.preheader
+inner.header:
+  %i.next = add nuw nsw i64 %i, 1
+  %exitcond20.not = icmp eq i64 %i.next, 1024
+  br i1 %exitcond20.not, label %exit, label %outer.header
 
-for.body4:
-  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
-  %arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %indvars.iv17, i64 %indvars.iv
+inner.body:
+  %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
+  %arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %i, i64 %j
   store i32 42, ptr %arrayidx6, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
-  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond.not = icmp eq i64 %j.next, 1024
+  br i1 %exitcond.not, label %inner.header, label %inner.body
+
+exit:
+  ret void
 }



More information about the llvm-commits mailing list