[llvm] [LoopInterchange] Add metadata to control loop-interchange (PR #127474)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 09:48:50 PST 2025
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/127474
>From 1b11ebe3fa04e07b6cb6ceeaeb50d44fac7aa983 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 17 Feb 2025 11:23:46 +0000
Subject: [PATCH 1/2] [LoopInterchange] Add metadata to control
loop-interchange
This patch adds metadata to enable/disable the loop-interchange for a
loop nest. This is a prelude to introduce a new pragma directive for
loop-interchange, like other loop optimizations (unroll, vectorize,
distribute, etc.) have.
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 76 ++++
.../Transforms/LoopInterchange/metadata.ll | 325 ++++++++++++++++++
2 files changed, 401 insertions(+)
create mode 100644 llvm/test/Transforms/LoopInterchange/metadata.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 967be109a7ba6..97b2d0c494e4c 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -51,6 +51,16 @@ using namespace llvm;
#define DEBUG_TYPE "loop-interchange"
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopInterchangeFollowupAll =
+ "llvm.loop.interchange.followup_all";
+static const char *const LLVMLoopInterchangeFollowupOuter =
+ "llvm.loop.interchange.followup_outer";
+static const char *const LLVMLoopInterchangeFollowupInner =
+ "llvm.loop.interchange.followup_inner";
+/// @}
+
STATISTIC(LoopsInterchanged, "Number of loops interchanged");
static cl::opt<int> LoopInterchangeCostThreshold(
@@ -65,6 +75,14 @@ static cl::opt<unsigned int> MaxMemInstrCount(
"in the dependency matrix. Higher value may lead to more interchanges "
"at the cost of compile-time"));
+// Whether to apply by default.
+// TODO: Once this pass is enabled by default, remove this option and use the
+// value of PipelineTuningOptions.
+static cl::opt<bool> OnlyWhenForced(
+ "loop-interchange-only-when-forced", cl::init(false), cl::ReallyHidden,
+ cl::desc(
+ "Apply interchanges only when explicitly specified metadata exists"));
+
namespace {
using LoopVector = SmallVector<Loop *, 8>;
@@ -297,6 +315,16 @@ static bool isComputableLoopNest(ScalarEvolution *SE,
return true;
}
+static std::optional<bool> findMetadata(Loop *L) {
+ auto Value = findStringMetadataForLoop(L, "llvm.loop.interchange.enable");
+ if (!Value)
+ return std::nullopt;
+
+ const MDOperand *Op = *Value;
+ assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
+ return mdconst::extract<ConstantInt>(*Op)->getZExtValue();
+}
+
namespace {
/// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -504,6 +532,10 @@ struct LoopInterchange {
CostMap[LoopCosts[i].first] = i;
}
}
+
+ if (OnlyWhenForced)
+ return processEnabledLoop(LoopList, DependencyMatrix, CostMap);
+
// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position
@@ -532,6 +564,8 @@ struct LoopInterchange {
Loop *InnerLoop = LoopList[InnerLoopId];
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
+ if (findMetadata(OuterLoop) == false || findMetadata(InnerLoop) == false)
+ return false;
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
@@ -569,6 +603,48 @@ struct LoopInterchange {
return true;
}
+
+ bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ bool Changed = false;
+ for (unsigned InnerLoopId = LoopList.size() - 1; InnerLoopId > 0;
+ InnerLoopId--) {
+ unsigned OuterLoopId = InnerLoopId - 1;
+ if (findMetadata(LoopList[OuterLoopId]) != true)
+ continue;
+
+ MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID();
+ bool Interchanged =
+ processLoop(LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
+ OuterLoopId, DependencyMatrix, CostMap);
+
+ // TODO: Consolidate the duplicate code in `processLoopList`.
+ if (Interchanged) {
+ std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
+ // Update the DependencyMatrix
+ interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
+
+ LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
+ printDepMatrix(DependencyMatrix));
+ }
+
+ std::optional<MDNode *> MDOuterLoopID =
+ makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupOuter});
+ if (MDOuterLoopID)
+ LoopList[OuterLoopId]->setLoopID(*MDOuterLoopID);
+
+ std::optional<MDNode *> MDInnerLoopID =
+ makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupInner});
+ if (MDInnerLoopID)
+ LoopList[InnerLoopId]->setLoopID(*MDInnerLoopID);
+
+ Changed |= Interchanged;
+ }
+ return Changed;
+ }
};
} // end anonymous namespace
diff --git a/llvm/test/Transforms/LoopInterchange/metadata.ll b/llvm/test/Transforms/LoopInterchange/metadata.ll
new file mode 100644
index 0000000000000..9838abb905a7e
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/metadata.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=0 --cache-line-size=64 -S < %s | FileCheck %s --check-prefix=DEFAULT-ON
+; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=1 --cache-line-size=64 -S < %s | FileCheck %s --check-prefix=DEFAULT-OFF
+
+; Test if the metadata works correctly. The code is as follows:
+;
+; #define N 4
+; int a[N][N][N][N];
+; int b[N][N][N][N];
+; void f() {
+; for (int i = 0; i < N; i++)
+; for (int j = 0; j < N; j++)
+; #pragma clang loop interchange(enable or disable)
+; for (int k = 0; k < N; k++)
+; for (int l = 0; l < N; l++)
+; a[l][k][j][i] += b[l][k][j][i];
+; }
+;
+; In the functions explicit_on and explicit_off, the values enable and disable
+; are specified in the pragma, respectively. If the
+; `loop-interchange-only-when-forced` is set to 0, the loop-interchange will be
+; performed to the loop nest unless it is explicitly disabled. If the value is
+; set to 1, the loop-interchange will be performed to the loop nest only when
+; it is explicitly enabled.
+
+ at a = dso_local local_unnamed_addr global [2 x [2 x [2 x [2 x i32]]]] zeroinitializer, align 4
+ at b = dso_local local_unnamed_addr global [2 x [2 x [2 x [2 x i32]]]] zeroinitializer, align 4
+
+define void @explicit_on() {
+; DEFAULT-ON-LABEL: define void @explicit_on() {
+; DEFAULT-ON-NEXT: [[ENTRY:.*:]]
+; DEFAULT-ON-NEXT: br label %[[FOR_BODY12_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND1_PREHEADER_PREHEADER:.*]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND1_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_BODY12_SPLIT1:.*]]
+; DEFAULT-ON: [[FOR_COND5_PREHEADER_PREHEADER:.*]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP3]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
+; DEFAULT-ON-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP7_SPLIT:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP7:.*]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
+; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP3]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP7_SPLIT]]:
+; DEFAULT-ON-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV57]], 1
+; DEFAULT-ON-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 2
+; DEFAULT-ON-NEXT: br i1 [[TMP1]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP11_SPLIT:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP11:.*]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
+; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP7]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP11_SPLIT]]:
+; DEFAULT-ON-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDVARS_IV53]], 1
+; DEFAULT-ON-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 2
+; DEFAULT-ON-NEXT: br i1 [[TMP3]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_BODY12_SPLIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; DEFAULT-ON: [[FOR_BODY12:.*]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP6:%.*]], %[[FOR_BODY12_SPLIT]] ], [ 0, %[[FOR_BODY12_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_BODY12_SPLIT1]]:
+; DEFAULT-ON-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-ON-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
+; DEFAULT-ON-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-ON-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
+; DEFAULT-ON-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; DEFAULT-ON-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
+; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP11]]
+; DEFAULT-ON: [[FOR_BODY12_SPLIT]]:
+; DEFAULT-ON-NEXT: [[TMP6]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-ON-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 2
+; DEFAULT-ON-NEXT: br i1 [[TMP7]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP:.*]]
+; DEFAULT-ON: [[FOR_COND9_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV53]] = phi i64 [ [[TMP2]], %[[FOR_COND_CLEANUP11_SPLIT]] ], [ 0, %[[FOR_COND9_PREHEADER_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER_PREHEADER]]
+; DEFAULT-ON: [[FOR_BODY12_PREHEADER]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_BODY12]]
+; DEFAULT-ON: [[FOR_COND5_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV57]] = phi i64 [ [[TMP0]], %[[FOR_COND_CLEANUP7_SPLIT]] ], [ 0, %[[FOR_COND5_PREHEADER_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER_PREHEADER]]
+; DEFAULT-ON: [[FOR_COND9_PREHEADER_PREHEADER]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP]]:
+; DEFAULT-ON-NEXT: ret void
+;
+; DEFAULT-OFF-LABEL: define void @explicit_on() {
+; DEFAULT-OFF-NEXT: [[ENTRY:.*]]:
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; DEFAULT-OFF: [[FOR_COND1_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP3]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP7:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP3]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP11:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_BODY12_SPLIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; DEFAULT-OFF: [[FOR_BODY12:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2:%.*]], %[[FOR_BODY12_SPLIT]] ], [ 0, %[[FOR_BODY12_PREHEADER:.*]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER_PREHEADER:.*]]
+; DEFAULT-OFF: [[FOR_BODY12_SPLIT1:.*]]:
+; DEFAULT-OFF-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-OFF-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
+; DEFAULT-OFF-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-OFF-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
+; DEFAULT-OFF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; DEFAULT-OFF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND_CLEANUP11]]
+; DEFAULT-OFF: [[FOR_BODY12_SPLIT]]:
+; DEFAULT-OFF-NEXT: [[TMP2]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-OFF-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 2
+; DEFAULT-OFF-NEXT: br i1 [[TMP3]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP7]]
+; DEFAULT-OFF: [[FOR_COND9_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV53]] = phi i64 [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ], [ 0, %[[FOR_COND9_PREHEADER_PREHEADER]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12_SPLIT1]]
+; DEFAULT-OFF: [[FOR_BODY12_PREHEADER]]:
+; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12]]
+; DEFAULT-OFF: [[FOR_COND5_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV57]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12_PREHEADER]]
+; DEFAULT-OFF: [[FOR_COND9_PREHEADER_PREHEADER]]:
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP]]:
+; DEFAULT-OFF-NEXT: ret void
+;
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv61 = phi i64 [ 0, %entry ], [ %indvars.iv.next62, %for.cond.cleanup3 ]
+ br label %for.cond5.preheader
+
+for.cond.cleanup3:
+ %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1
+ %exitcond64 = icmp ne i64 %indvars.iv.next62, 2
+ br i1 %exitcond64, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7:
+ %indvars.iv.next58 = add nuw nsw i64 %indvars.iv57, 1
+ %exitcond60 = icmp ne i64 %indvars.iv.next58, 2
+ br i1 %exitcond60, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.cond.cleanup11:
+ %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1
+ %exitcond56 = icmp ne i64 %indvars.iv.next54, 2
+ br i1 %exitcond56, label %for.cond9.preheader, label %for.cond.cleanup7, !llvm.loop !0
+
+for.body12:
+ %indvars.iv = phi i64 [ 0, %for.cond9.preheader ], [ %indvars.iv.next, %for.body12 ]
+ %arrayidx18 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
+ %0 = load i32, ptr %arrayidx18, align 4
+ %arrayidx26 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
+ %1 = load i32, ptr %arrayidx26, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, ptr %arrayidx26, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 2
+ br i1 %exitcond, label %for.body12, label %for.cond.cleanup11
+
+for.cond9.preheader:
+ %indvars.iv53 = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next54, %for.cond.cleanup11 ]
+ br label %for.body12
+
+for.cond5.preheader:
+ %indvars.iv57 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next58, %for.cond.cleanup7 ]
+ br label %for.cond9.preheader
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @explicit_off() {
+; DEFAULT-ON-LABEL: define void @explicit_off() {
+; DEFAULT-ON-NEXT: [[ENTRY:.*:]]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND1_PREHEADER_PREHEADER:.*]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND1_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND5_PREHEADER_PREHEADER]]:
+; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP3]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
+; DEFAULT-ON-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP7:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP7]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
+; DEFAULT-ON-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP11:.*]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
+; DEFAULT-ON-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER]], label %[[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP2:![0-9]+]]
+; DEFAULT-ON: [[FOR_BODY12:.*]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_COND9_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY12]] ]
+; DEFAULT-ON-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-ON-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
+; DEFAULT-ON-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-ON-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
+; DEFAULT-ON-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; DEFAULT-ON-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
+; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-ON-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
+; DEFAULT-ON-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP11]]
+; DEFAULT-ON: [[FOR_COND9_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV53]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_BODY12]]
+; DEFAULT-ON: [[FOR_COND5_PREHEADER]]:
+; DEFAULT-ON-NEXT: [[INDVARS_IV57]] = phi i64 [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ], [ 0, %[[FOR_COND5_PREHEADER_PREHEADER]] ]
+; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER_PREHEADER]]
+; DEFAULT-ON: [[FOR_COND_CLEANUP]]:
+; DEFAULT-ON-NEXT: ret void
+;
+; DEFAULT-OFF-LABEL: define void @explicit_off() {
+; DEFAULT-OFF-NEXT: [[ENTRY:.*]]:
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; DEFAULT-OFF: [[FOR_COND1_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP3]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP7:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP3]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP11:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_COND_CLEANUP7]], !llvm.loop [[LOOP2:![0-9]+]]
+; DEFAULT-OFF: [[FOR_BODY12:.*]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_COND9_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY12]] ]
+; DEFAULT-OFF-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-OFF-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
+; DEFAULT-OFF-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
+; DEFAULT-OFF-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
+; DEFAULT-OFF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; DEFAULT-OFF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
+; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; DEFAULT-OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
+; DEFAULT-OFF-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP11]]
+; DEFAULT-OFF: [[FOR_COND9_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV53]] = phi i64 [ 0, %[[FOR_COND5_PREHEADER]] ], [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12]]
+; DEFAULT-OFF: [[FOR_COND5_PREHEADER]]:
+; DEFAULT-OFF-NEXT: [[INDVARS_IV57]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ]
+; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER]]
+; DEFAULT-OFF: [[FOR_COND_CLEANUP]]:
+; DEFAULT-OFF-NEXT: ret void
+;
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv61 = phi i64 [ 0, %entry ], [ %indvars.iv.next62, %for.cond.cleanup3 ]
+ br label %for.cond5.preheader
+
+for.cond.cleanup3:
+ %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1
+ %exitcond64 = icmp ne i64 %indvars.iv.next62, 2
+ br i1 %exitcond64, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7:
+ %indvars.iv.next58 = add nuw nsw i64 %indvars.iv57, 1
+ %exitcond60 = icmp ne i64 %indvars.iv.next58, 2
+ br i1 %exitcond60, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.cond.cleanup11:
+ %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1
+ %exitcond56 = icmp ne i64 %indvars.iv.next54, 2
+ br i1 %exitcond56, label %for.cond9.preheader, label %for.cond.cleanup7, !llvm.loop !2
+
+for.body12:
+ %indvars.iv = phi i64 [ 0, %for.cond9.preheader ], [ %indvars.iv.next, %for.body12 ]
+ %arrayidx18 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
+ %0 = load i32, ptr %arrayidx18, align 4
+ %arrayidx26 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
+ %1 = load i32, ptr %arrayidx26, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, ptr %arrayidx26, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 2
+ br i1 %exitcond, label %for.body12, label %for.cond.cleanup11
+
+for.cond9.preheader:
+ %indvars.iv53 = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next54, %for.cond.cleanup11 ]
+ br label %for.body12
+
+for.cond5.preheader:
+ %indvars.iv57 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next58, %for.cond.cleanup7 ]
+ br label %for.cond9.preheader
+
+for.cond.cleanup:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.interchange.enable", i1 true}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.interchange.enable", i1 false}
+;.
+; DEFAULT-ON: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; DEFAULT-ON: [[META1]] = !{!"llvm.loop.interchange.enable", i1 true}
+; DEFAULT-ON: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+; DEFAULT-ON: [[META3]] = !{!"llvm.loop.interchange.enable", i1 false}
+;.
+; DEFAULT-OFF: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; DEFAULT-OFF: [[META1]] = !{!"llvm.loop.interchange.enable", i1 true}
+; DEFAULT-OFF: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+; DEFAULT-OFF: [[META3]] = !{!"llvm.loop.interchange.enable", i1 false}
+;.
>From 2418ad8fbc3103a5263f08002f434ef102970bab Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 3 Mar 2025 11:47:30 +0000
Subject: [PATCH 2/2] Fix metadata to express the application order explicitly.
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 166 +++++--
.../LoopInterchange/metadata-disable.ll | 109 +++++
.../LoopInterchange/metadata-interruption.ll | 94 ++++
.../Transforms/LoopInterchange/metadata.ll | 404 ++++--------------
4 files changed, 434 insertions(+), 339 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/metadata-disable.ll
create mode 100644 llvm/test/Transforms/LoopInterchange/metadata-interruption.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 97b2d0c494e4c..baeeb3cb598ec 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -55,6 +55,8 @@ using namespace llvm;
/// Metadata attribute names
static const char *const LLVMLoopInterchangeFollowupAll =
"llvm.loop.interchange.followup_all";
+static const char *const LLVMLoopInterchangeFollowupNextOuter =
+ "llvm.loop.interchange.followup_next_outer";
static const char *const LLVMLoopInterchangeFollowupOuter =
"llvm.loop.interchange.followup_outer";
static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
}
}
+ // If OnlyWhenForced is true, only process loops for which interchange is
+ // explicitly enabled.
if (OnlyWhenForced)
return processEnabledLoop(LoopList, DependencyMatrix, CostMap);
@@ -564,8 +568,10 @@ struct LoopInterchange {
Loop *InnerLoop = LoopList[InnerLoopId];
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
- if (findMetadata(OuterLoop) == false || findMetadata(InnerLoop) == false)
+ if (findMetadata(OuterLoop) == false || findMetadata(InnerLoop) == false) {
+ LLVM_DEBUG(dbgs() << "Not interchanging loops. It is disabled.\n");
return false;
+ }
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
@@ -608,41 +614,145 @@ struct LoopInterchange {
std::vector<std::vector<char>> &DependencyMatrix,
const DenseMap<const Loop *, unsigned> &CostMap) {
bool Changed = false;
- for (unsigned InnerLoopId = LoopList.size() - 1; InnerLoopId > 0;
- InnerLoopId--) {
- unsigned OuterLoopId = InnerLoopId - 1;
- if (findMetadata(LoopList[OuterLoopId]) != true)
- continue;
- MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID();
- bool Interchanged =
- processLoop(LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
- OuterLoopId, DependencyMatrix, CostMap);
-
- // TODO: Consolidate the duplicate code in `processLoopList`.
- if (Interchanged) {
- std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
- // Update the DependencyMatrix
- interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
+ DenseMap<Loop *, unsigned> Loop2Index;
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ Loop2Index[LoopList[I]] = I;
+
+ // Hold outer loops to be exchanged (i.e., loops that have
+ // "llvm.loop.interchange.enable" is true), in the current nest order.
+ SmallVector<Loop *, 4> Worklist;
+
+ // Helper funciton to try to add a new loop into the Worklist. Return false
+ // if there is a duplicate in the loop to be interchanged.
+ auto AddLoopIfEnabled = [&](Loop *L) {
+ if (findMetadata(L) == true) {
+ if (!Worklist.empty()) {
+ // Because the loops are sorted in the order of the current nest, it
+ // is sufficient to compare with the last element.
+ unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
+ unsigned OuterLoopId = Loop2Index[L];
+ if (OuterLoopId <= InnerLoopId) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
+ L->getStartLoc(), L->getHeader())
+ << "The loops to be interchanged are overlapping.";
+ });
+ return false;
+ }
+ }
+ Worklist.push_back(L);
+ }
+ return true;
+ };
- LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
- printDepMatrix(DependencyMatrix));
+ // Initialize Worklist. To process the loops in inner-loop-first order, add
+ // them to the worklist in the outer-loop-first order.
+ for (unsigned I = 0; I != LoopList.size(); I++)
+ if (!AddLoopIfEnabled(LoopList[I]))
+ return Changed;
+
+ // Set an upper bound of the number of transformations to avoid infinite
+ // loop. There is no deep meaning behind the current value (square of the
+ // size of LoopList).
+ // TODO: Is this really necessary?
+ const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
+ unsigned Attempts = 0;
+
+ // Process the loops. An exchange is applied to two loops, but a metadata
+ // replacement can be applied to three loops: the two loops plus the next
+ // outer loop, if it exists. This is because it's necessary to express the
+ // information about the order of the application of interchanges in cases
+ // where the target loops to be exchanged are overlapping, e.g.,
+ //
+ // #pragma clang loop interchange(enable)
+ // for(int i=0;i<N;i++)
+ // #pragma clang loop interchange(enable)
+ // for (int j=0;j<N;j++)
+ // for (int k=0;k<N;k++)
+ // ...
+ //
+ // In this case we will exchange the innermost two loops at first, the
+ // follow-up metadata including enabling interchange is attached on the
+ // outermost loop, and it is enqueued as the next candidate to be processed.
+ while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
+ Loop *TargetLoop = Worklist.pop_back_val();
+ assert(findMetadata(TargetLoop) == true &&
+ "Some metadata was unexpectedlly removed");
+ unsigned OuterLoopId = Loop2Index[TargetLoop];
+ unsigned InnerLoopId = OuterLoopId + 1;
+ if (InnerLoopId >= LoopList.size()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "The metadata is invalid with an innermost loop.";
+ });
+ break;
+ }
+ MDNode *LoopID = TargetLoop->getLoopID();
+ bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap);
+ if (!Interchanged) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
+ TargetLoop->getStartLoc(),
+ TargetLoop->getHeader())
+ << "Failed to perform explicitly specified loop interchange.";
+ });
+ break;
}
- std::optional<MDNode *> MDOuterLoopID =
- makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
- LLVMLoopInterchangeFollowupOuter});
- if (MDOuterLoopID)
- LoopList[OuterLoopId]->setLoopID(*MDOuterLoopID);
+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
+ Loop *NextOuterLoop = nullptr;
+ if (0 < OuterLoopId)
+ NextOuterLoop = LoopList[OuterLoopId - 1];
+ Loop *OuterLoop = LoopList[OuterLoopId];
+ Loop *InnerLoop = LoopList[InnerLoopId];
+ Attempts++;
+ Changed = true;
+ Loop2Index[OuterLoop] = OuterLoopId;
+ Loop2Index[InnerLoop] = InnerLoopId;
+ // Update the metadata.
+ std::optional<MDNode *> MDNextOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupNextOuter});
+ std::optional<MDNode *> MDOuterLoopID =
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupOuter});
std::optional<MDNode *> MDInnerLoopID =
- makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
- LLVMLoopInterchangeFollowupInner});
+ makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
+ LLVMLoopInterchangeFollowupInner});
+ if (MDNextOuterLoopID) {
+ if (NextOuterLoop) {
+ NextOuterLoop->setLoopID(*MDNextOuterLoopID);
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "New metadata for the next outer loop is ignored.\n");
+ }
+ }
+ if (MDOuterLoopID)
+ OuterLoop->setLoopID(*MDOuterLoopID);
if (MDInnerLoopID)
- LoopList[InnerLoopId]->setLoopID(*MDInnerLoopID);
-
- Changed |= Interchanged;
+ InnerLoop->setLoopID(*MDInnerLoopID);
+
+ // Add new elements, paying attention to the order.
+ bool Valid = true;
+ if (NextOuterLoop)
+ Valid &= AddLoopIfEnabled(NextOuterLoop);
+ Valid &= AddLoopIfEnabled(OuterLoop);
+ Valid &= AddLoopIfEnabled(InnerLoop);
+ if (!Valid)
+ break;
}
+
+ LLVM_DEBUG({
+ if (!Worklist.empty())
+ dbgs() << "Some metadata was ignored because the maximum number of "
+ "attempts was reached.\n";
+ });
return Changed;
}
};
diff --git a/llvm/test/Transforms/LoopInterchange/metadata-disable.ll b/llvm/test/Transforms/LoopInterchange/metadata-disable.ll
new file mode 100644
index 0000000000000..af7af8892cb35
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/metadata-disable.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=0 --cache-line-size=64 -S < %s | FileCheck %s
+
+; Check that the interchange is not applied to the loop that is disabled by
+; metadata. The original code is as below:
+;
+; for (int i=0; i<128; i++)
+; for (int j=0; j<128; j++)
+; #pragma clang loop interchange(disable)
+; for (int k=0; k<128; k++)
+; for (int l=0; l<128; l++)
+; a[l][k][j][i]++;
+;
+; Since interchanges are not be applied to the k-loop, the pair (i, j) is the
+; only candidate for exchange.
+
+ at a = dso_local local_unnamed_addr global [128 x [128 x [128 x [128 x i32]]]] zeroinitializer, align 4
+
+define void @f() {
+; CHECK-LABEL: define void @f() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[FOR_J_HEADER_PREHEADER:.*]]
+; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
+; CHECK: [[FOR_I_HEADER]]:
+; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[IV_I_NEXT:%.*]], %[[FOR_I_CLEANUP:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_K_HEADER:.*]]
+; CHECK: [[FOR_J_HEADER_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_J_HEADER:.*]]
+; CHECK: [[FOR_J_HEADER]]:
+; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[IV_J_NEXT:%.*]], %[[FOR_J_CLEANUP:.*]] ], [ 0, %[[FOR_J_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
+; CHECK: [[FOR_K_HEADER]]:
+; CHECK-NEXT: [[IV_K:%.*]] = phi i64 [ 0, %[[FOR_I_HEADER]] ], [ [[IV_K_NEXT:%.*]], %[[FOR_K_CLEANUP:.*]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV_L:%.*]] = phi i64 [ 0, %[[FOR_K_HEADER]] ], [ [[TMP0:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 [[IV_L]], i64 [[IV_K]], i64 [[IV_J]], i64 [[IV_I]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[VAL]], 1
+; CHECK-NEXT: store i32 [[INC]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[IV_L]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 128
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_K_CLEANUP]], label %[[FOR_BODY]]
+; CHECK: [[FOR_K_CLEANUP]]:
+; CHECK-NEXT: [[IV_K_NEXT]] = add nuw nsw i64 [[IV_K]], 1
+; CHECK-NEXT: [[EXITCOND_K:%.*]] = icmp eq i64 [[IV_K_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND_K]], label %[[FOR_I_CLEANUP]], label %[[FOR_K_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[FOR_J_CLEANUP]]:
+; CHECK-NEXT: [[IV_J_NEXT]] = add nuw nsw i64 [[IV_J]], 1
+; CHECK-NEXT: [[EXITCOND_J:%.*]] = icmp eq i64 [[IV_J_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND_J]], label %[[EXIT:.*]], label %[[FOR_J_HEADER]]
+; CHECK: [[FOR_I_CLEANUP]]:
+; CHECK-NEXT: [[IV_I_NEXT]] = add nuw nsw i64 [[IV_I]], 1
+; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[IV_I_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[FOR_J_CLEANUP]], label %[[FOR_I_HEADER]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ]
+ br label %for.j.header
+
+for.j.header:
+ %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ]
+ br label %for.k.header
+
+for.k.header:
+ %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.k.cleanup ]
+ br label %for.body
+
+for.body:
+ %iv.l = phi i64 [ 0, %for.k.header ], [ %iv.l.next, %for.body ]
+ %ptr = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 %iv.l, i64 %iv.k, i64 %iv.j, i64 %iv.i
+ %val = load i32, ptr %ptr, align 4
+ %inc = add nuw nsw i32 %val, 1
+ store i32 %inc, ptr %ptr, align 4
+ %iv.l.next = add nuw nsw i64 %iv.l, 1
+ %exitcond.l = icmp eq i64 %iv.l.next, 128
+ br i1 %exitcond.l, label %for.k.cleanup, label %for.body
+
+for.k.cleanup:
+ %iv.k.next = add nuw nsw i64 %iv.k, 1
+ %exitcond.k = icmp eq i64 %iv.k.next, 128
+ br i1 %exitcond.k, label %for.j.cleanup, label %for.k.header, !llvm.loop !0
+
+for.j.cleanup:
+ %iv.j.next = add nuw nsw i64 %iv.j, 1
+ %exitcond.j = icmp eq i64 %iv.j.next, 128
+ br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header
+
+for.i.cleanup:
+ %iv.i.next = add nuw nsw i64 %iv.i, 1
+ %exitcond.i = icmp eq i64 %iv.i.next, 128
+ br i1 %exitcond.i, label %exit, label %for.i.header
+
+exit:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.interchange.enable", i1 false}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.interchange.enable", i1 false}
+;.
diff --git a/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll b/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll
new file mode 100644
index 0000000000000..aba7fc39b23a4
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -passes=loop-interchange -loop-interchange-only-when-forced=1 -pass-remarks-output=%t -disable-output
+; RUN: FileCheck -input-file %t %s
+
+; Test that the loop-interchange stops processing for some reason even though
+; some loops have metadata specyfing interchange enable.
+
+ at a = dso_local local_unnamed_addr global [128 x [128 x [128 x i32]]] zeroinitializer, align 4
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Innermost
+; CHECK-NEXT: Function: enable_innermost
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: The metadata is invalid with an innermost loop.
+define void @enable_innermost() {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ]
+ br label %for.j.header
+
+for.j.header:
+ %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ]
+ br label %for.body
+
+for.body:
+ %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.body ]
+ %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i
+ %val = load i32, ptr %ptr, align 4
+ %inc = add nuw nsw i32 %val, 1
+ store i32 %inc, ptr %ptr, align 4
+ %iv.k.next = add nuw nsw i64 %iv.k, 1
+ %exitcond.k = icmp eq i64 %iv.k.next, 128
+ br i1 %exitcond.k, label %for.j.cleanup, label %for.body, !llvm.loop !0
+
+for.j.cleanup:
+ %iv.j.next = add nuw nsw i64 %iv.j, 1
+ %exitcond.j = icmp eq i64 %iv.j.next, 128
+ br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header
+
+for.i.cleanup:
+ %iv.i.next = add nuw nsw i64 %iv.i, 1
+ %exitcond.i = icmp eq i64 %iv.i.next, 128
+ br i1 %exitcond.i, label %exit, label %for.i.header
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: AmbigiousOrder
+; CHECK-NEXT: Function: ambiguous_order
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: The loops to be interchanged are overlapping.
+define void @ambiguous_order() {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ]
+ br label %for.j.header
+
+for.j.header:
+ %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ]
+ br label %for.body
+
+for.body:
+ %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.body ]
+ %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i
+ %val = load i32, ptr %ptr, align 4
+ %inc = add nuw nsw i32 %val, 1
+ store i32 %inc, ptr %ptr, align 4
+ %iv.k.next = add nuw nsw i64 %iv.k, 1
+ %exitcond.k = icmp eq i64 %iv.k.next, 128
+ br i1 %exitcond.k, label %for.j.cleanup, label %for.body
+
+for.j.cleanup:
+ %iv.j.next = add nuw nsw i64 %iv.j, 1
+ %exitcond.j = icmp eq i64 %iv.j.next, 128
+ br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header, !llvm.loop !0
+
+for.i.cleanup:
+ %iv.i.next = add nuw nsw i64 %iv.i, 1
+ %exitcond.i = icmp eq i64 %iv.i.next, 128
+ br i1 %exitcond.i, label %exit, label %for.i.header, !llvm.loop !0
+
+exit:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.interchange.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopInterchange/metadata.ll b/llvm/test/Transforms/LoopInterchange/metadata.ll
index 9838abb905a7e..e31bc3cb8e907 100644
--- a/llvm/test/Transforms/LoopInterchange/metadata.ll
+++ b/llvm/test/Transforms/LoopInterchange/metadata.ll
@@ -1,325 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=0 --cache-line-size=64 -S < %s | FileCheck %s --check-prefix=DEFAULT-ON
-; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=1 --cache-line-size=64 -S < %s | FileCheck %s --check-prefix=DEFAULT-OFF
+; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=1 --cache-line-size=64 -S < %s | FileCheck %s
-; Test if the metadata works correctly. The code is as follows:
-;
-; #define N 4
-; int a[N][N][N][N];
-; int b[N][N][N][N];
-; void f() {
-; for (int i = 0; i < N; i++)
-; for (int j = 0; j < N; j++)
-; #pragma clang loop interchange(enable or disable)
-; for (int k = 0; k < N; k++)
-; for (int l = 0; l < N; l++)
-; a[l][k][j][i] += b[l][k][j][i];
-; }
-;
-; In the functions explicit_on and explicit_off, the values enable and disable
-; are specified in the pragma, respectively. If the
-; `loop-interchange-only-when-forced` is set to 0, the loop-interchange will be
-; performed to the loop nest unless it is explicitly disabled. If the value is
-; set to 1, the loop-interchange will be performed to the loop nest only when
-; it is explicitly enabled.
-
- at a = dso_local local_unnamed_addr global [2 x [2 x [2 x [2 x i32]]]] zeroinitializer, align 4
- at b = dso_local local_unnamed_addr global [2 x [2 x [2 x [2 x i32]]]] zeroinitializer, align 4
+ at a = dso_local local_unnamed_addr global [128 x [128 x [128 x i32]]] zeroinitializer, align 4
-define void @explicit_on() {
-; DEFAULT-ON-LABEL: define void @explicit_on() {
-; DEFAULT-ON-NEXT: [[ENTRY:.*:]]
-; DEFAULT-ON-NEXT: br label %[[FOR_BODY12_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND1_PREHEADER_PREHEADER:.*]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND1_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_BODY12_SPLIT1:.*]]
-; DEFAULT-ON: [[FOR_COND5_PREHEADER_PREHEADER:.*]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP3]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
-; DEFAULT-ON-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP7_SPLIT:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP7:.*]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
-; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP3]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP7_SPLIT]]:
-; DEFAULT-ON-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV57]], 1
-; DEFAULT-ON-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 2
-; DEFAULT-ON-NEXT: br i1 [[TMP1]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP11_SPLIT:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP11:.*]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
-; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP7]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP11_SPLIT]]:
-; DEFAULT-ON-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDVARS_IV53]], 1
-; DEFAULT-ON-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 2
-; DEFAULT-ON-NEXT: br i1 [[TMP3]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_BODY12_SPLIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; DEFAULT-ON: [[FOR_BODY12:.*]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP6:%.*]], %[[FOR_BODY12_SPLIT]] ], [ 0, %[[FOR_BODY12_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_BODY12_SPLIT1]]:
-; DEFAULT-ON-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-ON-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
-; DEFAULT-ON-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-ON-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
-; DEFAULT-ON-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
-; DEFAULT-ON-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
-; DEFAULT-ON-NEXT: br label %[[FOR_COND_CLEANUP11]]
-; DEFAULT-ON: [[FOR_BODY12_SPLIT]]:
-; DEFAULT-ON-NEXT: [[TMP6]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-ON-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 2
-; DEFAULT-ON-NEXT: br i1 [[TMP7]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP:.*]]
-; DEFAULT-ON: [[FOR_COND9_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV53]] = phi i64 [ [[TMP2]], %[[FOR_COND_CLEANUP11_SPLIT]] ], [ 0, %[[FOR_COND9_PREHEADER_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER_PREHEADER]]
-; DEFAULT-ON: [[FOR_BODY12_PREHEADER]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_BODY12]]
-; DEFAULT-ON: [[FOR_COND5_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV57]] = phi i64 [ [[TMP0]], %[[FOR_COND_CLEANUP7_SPLIT]] ], [ 0, %[[FOR_COND5_PREHEADER_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER_PREHEADER]]
-; DEFAULT-ON: [[FOR_COND9_PREHEADER_PREHEADER]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP]]:
-; DEFAULT-ON-NEXT: ret void
+; Check that the interchanges are applied in the expected order. The original
+; code looks like as follows:
;
-; DEFAULT-OFF-LABEL: define void @explicit_on() {
-; DEFAULT-OFF-NEXT: [[ENTRY:.*]]:
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
-; DEFAULT-OFF: [[FOR_COND1_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP3]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP7:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP3]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP11:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_BODY12_SPLIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; DEFAULT-OFF: [[FOR_BODY12:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2:%.*]], %[[FOR_BODY12_SPLIT]] ], [ 0, %[[FOR_BODY12_PREHEADER:.*]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER_PREHEADER:.*]]
-; DEFAULT-OFF: [[FOR_BODY12_SPLIT1:.*]]:
-; DEFAULT-OFF-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-OFF-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
-; DEFAULT-OFF-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-OFF-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
-; DEFAULT-OFF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; DEFAULT-OFF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND_CLEANUP11]]
-; DEFAULT-OFF: [[FOR_BODY12_SPLIT]]:
-; DEFAULT-OFF-NEXT: [[TMP2]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-OFF-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 2
-; DEFAULT-OFF-NEXT: br i1 [[TMP3]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP7]]
-; DEFAULT-OFF: [[FOR_COND9_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV53]] = phi i64 [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ], [ 0, %[[FOR_COND9_PREHEADER_PREHEADER]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12_SPLIT1]]
-; DEFAULT-OFF: [[FOR_BODY12_PREHEADER]]:
-; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12]]
-; DEFAULT-OFF: [[FOR_COND5_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV57]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12_PREHEADER]]
-; DEFAULT-OFF: [[FOR_COND9_PREHEADER_PREHEADER]]:
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP]]:
-; DEFAULT-OFF-NEXT: ret void
+; #pragma clang loop interchange(enable)
+; for (int j=0; j<128; j++)
+; #pragma clang loop interchange(enable)
+; for (int i=0; i<128; i++)
+; for (int k=0; k<128; k++)
+; a[k][j][i]++;
;
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv61 = phi i64 [ 0, %entry ], [ %indvars.iv.next62, %for.cond.cleanup3 ]
- br label %for.cond5.preheader
-
-for.cond.cleanup3:
- %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1
- %exitcond64 = icmp ne i64 %indvars.iv.next62, 2
- br i1 %exitcond64, label %for.cond1.preheader, label %for.cond.cleanup
-
-for.cond.cleanup7:
- %indvars.iv.next58 = add nuw nsw i64 %indvars.iv57, 1
- %exitcond60 = icmp ne i64 %indvars.iv.next58, 2
- br i1 %exitcond60, label %for.cond5.preheader, label %for.cond.cleanup3
-
-for.cond.cleanup11:
- %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1
- %exitcond56 = icmp ne i64 %indvars.iv.next54, 2
- br i1 %exitcond56, label %for.cond9.preheader, label %for.cond.cleanup7, !llvm.loop !0
-
-for.body12:
- %indvars.iv = phi i64 [ 0, %for.cond9.preheader ], [ %indvars.iv.next, %for.body12 ]
- %arrayidx18 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
- %0 = load i32, ptr %arrayidx18, align 4
- %arrayidx26 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
- %1 = load i32, ptr %arrayidx26, align 4
- %add = add nsw i32 %1, %0
- store i32 %add, ptr %arrayidx26, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 2
- br i1 %exitcond, label %for.body12, label %for.cond.cleanup11
-
-for.cond9.preheader:
- %indvars.iv53 = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next54, %for.cond.cleanup11 ]
- br label %for.body12
-
-for.cond5.preheader:
- %indvars.iv57 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next58, %for.cond.cleanup7 ]
- br label %for.cond9.preheader
-
-for.cond.cleanup:
- ret void
-}
-
-define void @explicit_off() {
-; DEFAULT-ON-LABEL: define void @explicit_off() {
-; DEFAULT-ON-NEXT: [[ENTRY:.*:]]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND1_PREHEADER_PREHEADER:.*]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND1_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND9_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND5_PREHEADER_PREHEADER]]:
-; DEFAULT-ON-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP3]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
-; DEFAULT-ON-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP7:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP7]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
-; DEFAULT-ON-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP11:.*]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
-; DEFAULT-ON-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER]], label %[[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP2:![0-9]+]]
-; DEFAULT-ON: [[FOR_BODY12:.*]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_COND9_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY12]] ]
-; DEFAULT-ON-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-ON-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
-; DEFAULT-ON-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-ON-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
-; DEFAULT-ON-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; DEFAULT-ON-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
-; DEFAULT-ON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-ON-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
-; DEFAULT-ON-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP11]]
-; DEFAULT-ON: [[FOR_COND9_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV53]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_BODY12]]
-; DEFAULT-ON: [[FOR_COND5_PREHEADER]]:
-; DEFAULT-ON-NEXT: [[INDVARS_IV57]] = phi i64 [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ], [ 0, %[[FOR_COND5_PREHEADER_PREHEADER]] ]
-; DEFAULT-ON-NEXT: br label %[[FOR_COND1_PREHEADER_PREHEADER]]
-; DEFAULT-ON: [[FOR_COND_CLEANUP]]:
-; DEFAULT-ON-NEXT: ret void
+; At first the interchange is applied to the j-loop and the k-loop. The
+; follow-up metadata is attached to the outermost loop, then the interchange to
+; the i-loop and the k-loop.
;
-; DEFAULT-OFF-LABEL: define void @explicit_off() {
-; DEFAULT-OFF-NEXT: [[ENTRY:.*]]:
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
-; DEFAULT-OFF: [[FOR_COND1_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV61:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT62:%.*]], %[[FOR_COND_CLEANUP3:.*]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND5_PREHEADER:.*]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP3]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT62]] = add nuw nsw i64 [[INDVARS_IV61]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND64:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT62]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND64]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP7:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT58:%.*]] = add nuw nsw i64 [[INDVARS_IV57:%.*]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND60:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT58]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND60]], label %[[FOR_COND5_PREHEADER]], label %[[FOR_COND_CLEANUP3]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP11:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT54:%.*]] = add nuw nsw i64 [[INDVARS_IV53:%.*]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND56:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT54]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND56]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_COND_CLEANUP7]], !llvm.loop [[LOOP2:![0-9]+]]
-; DEFAULT-OFF: [[FOR_BODY12:.*]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_COND9_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY12]] ]
-; DEFAULT-OFF-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-OFF-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
-; DEFAULT-OFF-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV53]], i64 [[INDVARS_IV57]], i64 [[INDVARS_IV61]]
-; DEFAULT-OFF-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4
-; DEFAULT-OFF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; DEFAULT-OFF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX26]], align 4
-; DEFAULT-OFF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; DEFAULT-OFF-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 2
-; DEFAULT-OFF-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY12]], label %[[FOR_COND_CLEANUP11]]
-; DEFAULT-OFF: [[FOR_COND9_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV53]] = phi i64 [ 0, %[[FOR_COND5_PREHEADER]] ], [ [[INDVARS_IV_NEXT54]], %[[FOR_COND_CLEANUP11]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_BODY12]]
-; DEFAULT-OFF: [[FOR_COND5_PREHEADER]]:
-; DEFAULT-OFF-NEXT: [[INDVARS_IV57]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT58]], %[[FOR_COND_CLEANUP7]] ]
-; DEFAULT-OFF-NEXT: br label %[[FOR_COND9_PREHEADER]]
-; DEFAULT-OFF: [[FOR_COND_CLEANUP]]:
-; DEFAULT-OFF-NEXT: ret void
+define void @f() {
+; CHECK-LABEL: define void @f() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
+; CHECK: [[FOR_I_HEADER]]:
+; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_I_NEXT:%.*]], %[[FOR_I_CLEANUP:.*]] ]
+; CHECK-NEXT: br label %[[FOR_J_HEADER:.*]]
+; CHECK: [[FOR_J_HEADER]]:
+; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ 0, %[[FOR_I_HEADER]] ], [ [[IV_J_NEXT:%.*]], %[[FOR_J_CLEANUP:.*]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV_K:%.*]] = phi i64 [ 0, %[[FOR_J_HEADER]] ], [ [[TMP0:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 [[IV_K]], i64 [[IV_J]], i64 [[IV_I]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[VAL]], 1
+; CHECK-NEXT: store i32 [[INC]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[IV_K]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 128
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J_CLEANUP]], label %[[FOR_BODY]]
+; CHECK: [[FOR_J_CLEANUP]]:
+; CHECK-NEXT: [[IV_J_NEXT]] = add nuw nsw i64 [[IV_J]], 1
+; CHECK-NEXT: [[EXITCOND_J:%.*]] = icmp eq i64 [[IV_J_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND_J]], label %[[FOR_I_CLEANUP]], label %[[FOR_J_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[FOR_I_CLEANUP]]:
+; CHECK-NEXT: [[IV_I_NEXT]] = add nuw nsw i64 [[IV_I]], 1
+; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[IV_I_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[EXIT:.*]], label %[[FOR_I_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv61 = phi i64 [ 0, %entry ], [ %indvars.iv.next62, %for.cond.cleanup3 ]
- br label %for.cond5.preheader
-
-for.cond.cleanup3:
- %indvars.iv.next62 = add nuw nsw i64 %indvars.iv61, 1
- %exitcond64 = icmp ne i64 %indvars.iv.next62, 2
- br i1 %exitcond64, label %for.cond1.preheader, label %for.cond.cleanup
-
-for.cond.cleanup7:
- %indvars.iv.next58 = add nuw nsw i64 %indvars.iv57, 1
- %exitcond60 = icmp ne i64 %indvars.iv.next58, 2
- br i1 %exitcond60, label %for.cond5.preheader, label %for.cond.cleanup3
-
-for.cond.cleanup11:
- %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1
- %exitcond56 = icmp ne i64 %indvars.iv.next54, 2
- br i1 %exitcond56, label %for.cond9.preheader, label %for.cond.cleanup7, !llvm.loop !2
-
-for.body12:
- %indvars.iv = phi i64 [ 0, %for.cond9.preheader ], [ %indvars.iv.next, %for.body12 ]
- %arrayidx18 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @b, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
- %0 = load i32, ptr %arrayidx18, align 4
- %arrayidx26 = getelementptr inbounds nuw [2 x [2 x [2 x [2 x i32]]]], ptr @a, i64 0, i64 %indvars.iv, i64 %indvars.iv53, i64 %indvars.iv57, i64 %indvars.iv61
- %1 = load i32, ptr %arrayidx26, align 4
- %add = add nsw i32 %1, %0
- store i32 %add, ptr %arrayidx26, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 2
- br i1 %exitcond, label %for.body12, label %for.cond.cleanup11
-
-for.cond9.preheader:
- %indvars.iv53 = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next54, %for.cond.cleanup11 ]
- br label %for.body12
-
-for.cond5.preheader:
- %indvars.iv57 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next58, %for.cond.cleanup7 ]
- br label %for.cond9.preheader
-
-for.cond.cleanup:
+ br label %for.i.header
+
+for.i.header:
+ %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ]
+ br label %for.j.header
+
+for.j.header:
+ %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ]
+ br label %for.body
+
+for.body:
+ %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.body ]
+ %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i
+ %val = load i32, ptr %ptr, align 4
+ %inc = add nuw nsw i32 %val, 1
+ store i32 %inc, ptr %ptr, align 4
+ %iv.k.next = add nuw nsw i64 %iv.k, 1
+ %exitcond.k = icmp eq i64 %iv.k.next, 128
+ br i1 %exitcond.k, label %for.j.cleanup, label %for.body
+
+for.j.cleanup:
+ %iv.j.next = add nuw nsw i64 %iv.j, 1
+ %exitcond.j = icmp eq i64 %iv.j.next, 128
+ br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header, !llvm.loop !0
+
+for.i.cleanup:
+ %iv.i.next = add nuw nsw i64 %iv.i, 1
+ %exitcond.i = icmp eq i64 %iv.i.next, 128
+ br i1 %exitcond.i, label %exit, label %for.i.header, !llvm.loop !5
+
+exit:
ret void
}
-!0 = distinct !{!0, !1}
+!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.interchange.enable", i1 true}
-!2 = distinct !{!2, !3}
-!3 = !{!"llvm.loop.interchange.enable", i1 false}
-;.
-; DEFAULT-ON: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; DEFAULT-ON: [[META1]] = !{!"llvm.loop.interchange.enable", i1 true}
-; DEFAULT-ON: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-; DEFAULT-ON: [[META3]] = !{!"llvm.loop.interchange.enable", i1 false}
+!2 = !{!"llvm.loop.interchange.followup_all", !{!"FolloupAll"}}
+!3 = !{!"llvm.loop.interchange.followup_inner", !{!"FollowupInner0"}}
+!4 = !{!"llvm.loop.interchange.followup_outer", !{!"FollowupOuter0"}}
+!5 = !{!"llvm.loop.interchange.followup_next_outer", !1, !2, !6, !7}
+!6 = !{!"llvm.loop.interchange.followup_inner", !{!"FollowupInner1"}}
+!7 = !{!"llvm.loop.interchange.followup_outer", !{!"FollowupOuter1"}}
;.
-; DEFAULT-OFF: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; DEFAULT-OFF: [[META1]] = !{!"llvm.loop.interchange.enable", i1 true}
-; DEFAULT-OFF: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-; DEFAULT-OFF: [[META3]] = !{!"llvm.loop.interchange.enable", i1 false}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META4:![0-9]+]], [[META6:![0-9]+]], [[LOOP8]]}
+; CHECK: [[META1]] = !{!"llvm.loop.interchange.enable", i1 true}
+; CHECK: [[META2]] = !{!"llvm.loop.interchange.followup_all", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"FolloupAll"}
+; CHECK: [[META4]] = !{!"llvm.loop.interchange.followup_inner", [[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{!"FollowupInner0"}
+; CHECK: [[META6]] = !{!"llvm.loop.interchange.followup_outer", [[META7:![0-9]+]]}
+; CHECK: [[META7]] = !{!"FollowupOuter0"}
+; CHECK: [[LOOP8]] = !{!"llvm.loop.interchange.followup_next_outer", [[META1]], [[META2]], [[META9:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META9]] = !{!"llvm.loop.interchange.followup_inner", [[META10:![0-9]+]]}
+; CHECK: [[META10]] = !{!"FollowupInner1"}
+; CHECK: [[META11]] = !{!"llvm.loop.interchange.followup_outer", [[META12:![0-9]+]]}
+; CHECK: [[META12]] = !{!"FollowupOuter1"}
;.
More information about the llvm-commits
mailing list