[llvm] [MachineLICM] Allow hoisting loads from invariant address (PR #70796)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 05:49:42 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Igor Kirillov (igogo-x86)
<details>
<summary>Changes</summary>
Sometimes, loads can appear in a loop after the LICM pass is executed
the final time. For example, ExpandMemCmp pass creates loads in a loop,
and one of the operands may be an invariant address.
This patch extends the pre-regalloc stage MachineLICM by allowing to
hoist invariant loads from loops that don't have any stores or calls
and allows load reorderings.
---
Patch is 35.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70796.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/MachineLICM.cpp (+65-14)
- (added) llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll (+433)
- (modified) llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sinksplat.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll (+4-3)
- (modified) llvm/test/CodeGen/X86/block-placement.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/fma-commute-loop.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr49393.ll (+8-7)
- (modified) llvm/test/CodeGen/X86/pr53842.ll (+7-7)
``````````diff
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index e29f28ecaea0dce..f1af74328f0025a 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -72,6 +72,11 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+
+static cl::opt<bool> HoistConstLoads("hoist-const-loads",
+ cl::desc("Hoist invariant loads"),
+ cl::init(true), cl::Hidden);
+
// The default threshold of 100 (i.e. if target block is 100 times hotter)
// is based on empirical data on a single target and is subject to tuning.
static cl::opt<unsigned>
@@ -222,9 +227,11 @@ namespace {
void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
- bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
+ bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop,
+ bool SafeToMoveLoad);
- bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
+ bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop,
+ bool SafeToMoveLoad);
bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
@@ -277,7 +284,7 @@ namespace {
bool MayCSE(MachineInstr *MI);
unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
- MachineLoop *CurLoop);
+ MachineLoop *CurLoop, bool SafeToMoveLoad);
void InitCSEMap(MachineBasicBlock *BB);
@@ -494,7 +501,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
// operands. FIXME: Consider unfold load folding instructions.
if (Def && !RuledOut) {
int FI = std::numeric_limits<int>::min();
- if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop, false)) ||
(TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
Candidates.push_back(CandidateInfo(MI, Def, FI));
}
@@ -772,6 +779,32 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
BackTrace.clear();
InitRegPressure(Preheader);
+ // Compute information about whether it is allowed to move load instruction
+ // out of the current loop or one of the inner loops
+ SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
+ if (HoistConstLoads) {
+ SmallVector<MachineLoop *, 4> Worklist{CurLoop};
+
+ while (!Worklist.empty()) {
+ auto *L = Worklist.pop_back_val();
+ AllowedToHoistLoads[L] = true;
+ Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
+ L->getSubLoops().end());
+ }
+
+ for (auto *MBB : CurLoop->blocks()) {
+ for (auto &MI : *MBB) {
+ if (MI.mayStore() || MI.isCall() || (MI.mayLoad() && MI.hasOrderedMemoryRef())) {
+ for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
+ L = L->getParentLoop())
+ AllowedToHoistLoads[L] = false;
+ AllowedToHoistLoads[CurLoop] = false;
+ break;
+ }
+ }
+ }
+ }
+
// Now perform LICM.
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
@@ -780,9 +813,23 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
// Process the block
SpeculationState = SpeculateUnknown;
+
+ auto CanMoveLoad = [](MachineLoop *L) -> bool {
+ dbgs() << L << "\n";
+ for (auto *MBB : L->blocks()) {
+ for (auto &MI : *MBB) {
+ // Taken from MachineInstr::isSafeToMove
+ if (MI.mayStore() || MI.isCall() || (MI.mayLoad() && MI.hasOrderedMemoryRef()))
+ return false;
+ }
+ }
+ return true;
+ };
+
+ bool SafeToMoveLoad = HoistConstLoads && AllowedToHoistLoads[CurLoop];
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
unsigned HoistRes = HoistResult::NotHoisted;
- HoistRes = Hoist(&MI, Preheader, CurLoop);
+ HoistRes = Hoist(&MI, Preheader, CurLoop, SafeToMoveLoad);
if (HoistRes & HoistResult::NotHoisted) {
// We have failed to hoist MI to outermost loop's preheader. If MI is in
// a subloop, try to hoist it to subloop's preheader.
@@ -793,9 +840,12 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
while (!InnerLoopWorkList.empty()) {
MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
+ bool SafeToMoveLoadInner =
+ HoistConstLoads && AllowedToHoistLoads[InnerLoop];
MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
if (InnerLoopPreheader) {
- HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
+ HoistRes =
+ Hoist(&MI, InnerLoopPreheader, InnerLoop, SafeToMoveLoadInner);
if (HoistRes & HoistResult::Hoisted)
break;
}
@@ -990,9 +1040,10 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
-bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
+bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop,
+ bool SafeToMoveLoad) {
// Check if it's safe to move the instruction.
- bool DontMoveAcrossStore = true;
+ bool DontMoveAcrossStore = !SafeToMoveLoad;
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
@@ -1025,9 +1076,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
}
/// Returns true if the instruction is loop invariant.
-bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I,
- MachineLoop *CurLoop) {
- if (!IsLICMCandidate(I, CurLoop)) {
+bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop,
+ bool SafeToMoveLoad) {
+ if (!IsLICMCandidate(I, CurLoop, SafeToMoveLoad)) {
LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
return false;
}
@@ -1305,7 +1356,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI,
MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
- if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
+ if (!IsLoopInvariantInst(*NewMIs[0], CurLoop, /*SaveToMovLoad=*/false) ||
!IsProfitableToHoist(*NewMIs[0], CurLoop)) {
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
@@ -1432,7 +1483,7 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
- MachineLoop *CurLoop) {
+ MachineLoop *CurLoop, bool SafeToMoveLoad) {
MachineBasicBlock *SrcBlock = MI->getParent();
// Disable the instruction hoisting due to block hotness
@@ -1444,7 +1495,7 @@ unsigned MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
}
// First check whether we should hoist this instruction.
bool HasExtractHoistableLoad = false;
- if (!IsLoopInvariantInst(*MI, CurLoop) ||
+ if (!IsLoopInvariantInst(*MI, CurLoop, SafeToMoveLoad) ||
!IsProfitableToHoist(*MI, CurLoop)) {
// If not, try unfolding a hoistable load.
MI = ExtractHoistableLoad(MI, CurLoop);
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
new file mode 100644
index 000000000000000..6b76b03fe00fc81
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -0,0 +1,433 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+
+define i64 @one_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: one_dimensional:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cbz x2, .LBB0_4
+; CHECK-NEXT: // %bb.1: // %for.body.preheader
+; CHECK-NEXT: ldr w9, [x1]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: .LBB0_2: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr x10, [x0], #8
+; CHECK-NEXT: ldr w10, [x10]
+; CHECK-NEXT: cmp w10, w9
+; CHECK-NEXT: cinc x8, x8, ne
+; CHECK-NEXT: subs x2, x2, #1
+; CHECK-NEXT: b.ne .LBB0_2
+; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+entry:
+ %cmp4 = icmp eq i64 %N, 0
+ br i1 %cmp4, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.body ]
+ ret i64 %sum.0.lcssa
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.05 = phi i64 [ %spec.select, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.06
+ %0 = load ptr, ptr %arrayidx, align 8
+ %bcmp = tail call i32 @bcmp(ptr %0, ptr %b, i64 4)
+ %tobool = icmp ne i32 %bcmp, 0
+ %add = zext i1 %tobool to i64
+ %spec.select = add i64 %sum.05, %add
+ %inc = add nuw i64 %i.06, 1
+ %exitcond = icmp eq i64 %inc, %N
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @two_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: two_dimensional:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: cbz x2, .LBB1_6
+; CHECK-NEXT: // %bb.1: // %entry
+; CHECK-NEXT: cbz x3, .LBB1_6
+; CHECK-NEXT: // %bb.2: // %for.cond1.preheader.preheader
+; CHECK-NEXT: ldr w10, [x1]
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: .LBB1_3: // %for.cond1.preheader
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB1_4 Depth 2
+; CHECK-NEXT: ldr x11, [x0, x9, lsl #3]
+; CHECK-NEXT: mov x12, x3
+; CHECK-NEXT: .LBB1_4: // %for.body4
+; CHECK-NEXT: // Parent Loop BB1_3 Depth=1
+; CHECK-NEXT: // => This Inner Loop Header: Depth=2
+; CHECK-NEXT: ldr x13, [x11], #8
+; CHECK-NEXT: ldr w13, [x13]
+; CHECK-NEXT: cmp w13, w10
+; CHECK-NEXT: cinc x8, x8, ne
+; CHECK-NEXT: subs x12, x12, #1
+; CHECK-NEXT: b.ne .LBB1_4
+; CHECK-NEXT: // %bb.5: // %for.cond1.for.cond.cleanup3_crit_edge
+; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1
+; CHECK-NEXT: add x9, x9, #1
+; CHECK-NEXT: cmp x9, x2
+; CHECK-NEXT: b.ne .LBB1_3
+; CHECK-NEXT: .LBB1_6: // %for.cond.cleanup
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+entry:
+ %cmp17 = icmp eq i64 %N, 0
+ %cmp214 = icmp eq i64 %M, 0
+ %or.cond = or i1 %cmp17, %cmp214
+ br i1 %or.cond, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond.cleanup3_crit_edge
+ %i.019 = phi i64 [ %inc7, %for.cond1.for.cond.cleanup3_crit_edge ], [ 0, %entry ]
+ %sum.018 = phi i64 [ %spec.select, %for.cond1.for.cond.cleanup3_crit_edge ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.019
+ %0 = load ptr, ptr %arrayidx, align 8
+ br label %for.body4
+
+for.body4: ; preds = %for.cond1.preheader, %for.body4
+ %j.016 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
+ %sum.115 = phi i64 [ %sum.018, %for.cond1.preheader ], [ %spec.select, %for.body4 ]
+ %arrayidx5 = getelementptr inbounds ptr, ptr %0, i64 %j.016
+ %1 = load ptr, ptr %arrayidx5, align 8
+ %bcmp = tail call i32 @bcmp(ptr %1, ptr %b, i64 4)
+ %tobool = icmp ne i32 %bcmp, 0
+ %add = zext i1 %tobool to i64
+ %spec.select = add i64 %sum.115, %add
+ %inc = add nuw i64 %j.016, 1
+ %exitcond = icmp eq i64 %inc, %M
+ br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge, label %for.body4
+
+for.cond1.for.cond.cleanup3_crit_edge: ; preds = %for.body4
+ %inc7 = add nuw i64 %i.019, 1
+ %exitcond22 = icmp eq i64 %inc7, %N
+ br i1 %exitcond22, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge, %entry
+ %sum.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.cond1.for.cond.cleanup3_crit_edge ]
+ ret i64 %sum.0.lcssa
+}
+
+define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: three_dimensional_middle:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: cbz x2, .LBB2_9
+; CHECK-NEXT: // %bb.1: // %entry
+; CHECK-NEXT: cbz x3, .LBB2_9
+; CHECK-NEXT: // %bb.2: // %entry
+; CHECK-NEXT: cbz x4, .LBB2_9
+; CHECK-NEXT: // %bb.3: // %for.cond1.preheader.preheader
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: .LBB2_4: // %for.cond1.preheader
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB2_5 Depth 2
+; CHECK-NEXT: // Child Loop BB2_6 Depth 3
+; CHECK-NEXT: ldr x10, [x8, x9, lsl #3]
+; CHECK-NEXT: mov x11, xzr
+; CHECK-NEXT: .LBB2_5: // %for.cond5.preheader
+; CHECK-NEXT: // Parent Loop BB2_4 Depth=1
+; CHECK-NEXT: // => This Loop Header: Depth=2
+; CHECK-NEXT: // Child Loop BB2_6 Depth 3
+; CHECK-NEXT: lsl x12, x11, #3
+; CHECK-NEXT: mov x14, x4
+; CHECK-NEXT: ldr x13, [x1, x12]
+; CHECK-NEXT: ldr x12, [x10, x12]
+; CHECK-NEXT: ldr w13, [x13]
+; CHECK-NEXT: .LBB2_6: // %for.body8
+; CHECK-NEXT: // Parent Loop BB2_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB2_5 Depth=2
+; CHECK-NEXT: // => This Inner Loop Header: Depth=3
+; CHECK-NEXT: ldr x15, [x12], #8
+; CHECK-NEXT: ldr w15, [x15]
+; CHECK-NEXT: cmp w15, w13
+; CHECK-NEXT: cinc x0, x0, ne
+; CHECK-NEXT: subs x14, x14, #1
+; CHECK-NEXT: b.ne .LBB2_6
+; CHECK-NEXT: // %bb.7: // %for.cond5.for.cond
+; CHECK-NEXT: // in Loop: Header=BB2_5 Depth=2
+; CHECK-NEXT: add x11, x11, #1
+; CHECK-NEXT: cmp x11, x3
+; CHECK-NEXT: b.ne .LBB2_5
+; CHECK-NEXT: // %bb.8: // %for.cond1.for.cond
+; CHECK-NEXT: // in Loop: Header=BB2_4 Depth=1
+; CHECK-NEXT: add x9, x9, #1
+; CHECK-NEXT: cmp x9, x2
+; CHECK-NEXT: b.ne .LBB2_4
+; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %cmp33 = icmp eq i64 %N, 0
+ %cmp229 = icmp eq i64 %M, 0
+ %or.cond = or i1 %cmp33, %cmp229
+ %cmp626 = icmp eq i64 %K, 0
+ %or.cond48 = or i1 %or.cond, %cmp626
+ br i1 %or.cond48, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
+ %i.035 = phi i64 [ %inc16, %for.cond1.for.cond ], [ 0, %entry ]
+ %sum.034 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.035
+ %0 = load ptr, ptr %arrayidx, align 8
+ br label %for.cond5.preheader
+
+for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
+ %j.031 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.cond5.for.cond ]
+ %sum.130 = phi i64 [ %sum.034, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
+ %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.031
+ %1 = load ptr, ptr %arrayidx9, align 8
+ %arrayidx11 = getelementptr inbounds ptr, ptr %b, i64 %j.031
+ %2 = load ptr, ptr %arrayidx11, align 8
+ br label %for.body8
+
+for.body8: ; preds = %for.body8, %for.cond5.preheader
+ %k.028 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
+ %sum.227 = phi i64 [ %sum.130, %for.cond5.preheader ], [ %spec.select, %for.body8 ]
+ %arrayidx10 = getelementptr inbounds ptr, ptr %1, i64 %k.028
+ %3 = load ptr, ptr %arrayidx10, align 8
+ %bcmp = tail call i32 @bcmp(ptr %3, ptr %2, i64 4)
+ %tobool = icmp ne i32 %bcmp, 0
+ %add = zext i1 %tobool to i64
+ %spec.select = add i64 %sum.227, %add
+ %inc = add nuw i64 %k.028, 1
+ %exitcond = icmp eq i64 %inc, %K
+ br i1 %exitcond, label %for.cond5.for.cond, label %for.body8
+
+for.cond5.for.cond: ; preds = %for.body8
+ %inc13 = add nuw i64 %j.031, 1
+ %exitcond46 = icmp eq i64 %inc13, %M
+ br i1 %exitcond46, label %for.cond1.for.cond, label %for.cond5.preheader
+
+for.cond1.for.cond: ; preds = %for.cond5.for.cond
+ %inc16 = add nuw i64 %i.035, 1
+ %exitcond47 = icmp eq i64 %inc16, %N
+ br i1 %exitcond47, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond.cleanup: ; preds = %for.cond1.for.cond, %entry
+ %sum.0.lcssa = phi i64 [ 0, %entry ], [ %spec.select, %for.cond1.for.cond ]
+ ret i64 %sum.0.lcssa
+}
+
+define i64 @three_dimensional(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
+; CHECK-LABEL: three_dimensional:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: cbz x2, .LBB3_9
+; CHECK-NEXT: // %bb.1: // %entry
+; CHECK-NEXT: cbz x3, .LBB3_9
+; CHECK-NEXT: // %bb.2: // %entry
+; CHECK-NEXT: cbz x4, .LBB3_9
+; CHECK-NEXT: // %bb.3: // %for.cond1.preheader.preheader
+; CHECK-NEXT: ldr w10, [x1]
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: .LBB3_4: // %for.cond1.preheader
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB3_5 Depth 2
+; CHECK-NEXT: // Child Loop BB3_6 Depth 3
+; CHECK-NEXT: ldr x11, [x0, x9, lsl #3]
+; CHECK-NEXT: mov x12, xzr
+; CHECK-NEXT: .LBB3_5: // %for.cond5.preheader
+; CHECK-NEXT: // Parent Loop BB3_4 Depth=1
+; CHECK-NEXT: // => This Loop Header: Depth=2
+; CHECK-NEXT: // Child Loop BB3_6 Depth 3
+; CHECK-NEXT: ldr x13, [x11, x12, lsl #3]
+; CHECK-NEXT: mov x14, x4
+; CHECK-NEXT: .LBB3_6: // %for.body8
+; CHECK-NEXT: // Parent Loop BB3_4 Depth=1
+; CHECK-NEXT: // Parent Loop BB3_5 Depth=2
+; CHECK-NEXT: // => This Inner Loop Header: Depth=3
+; CHECK-NEXT: ldr x15, [x13], #8
+; CHECK-NEXT: ldr w15, [x15]
+; CHECK-NEXT: cmp w15, w10
+; CHECK-NEXT: cinc x8, x8, ne
+; CHECK-NEXT: subs x14, x14, #1
+; CHECK-NEXT: b.ne .LBB3_6
+; CHECK-NEXT: // %bb.7: // %for.cond5.for.cond
+; CHECK-NEXT: // in Loop: Header=BB3_5 Depth=2
+; CHECK-NEXT: add x12, x12, #1
+; CHECK-NEXT: cmp x12, x3
+; CHECK-NEXT: b.ne .LBB3_5
+; CHECK-NEXT: // %bb.8: // %for.cond1.for.cond
+; CHECK-NEXT: // in Loop: Header=BB3_4 Depth=1
+; CHECK-NEXT: add x9, x9, #1
+; CHECK-NEXT: cmp x9, x2
+; CHECK-NEXT: b.ne .LBB3_4
+; CHECK-NEXT: .LBB3_9: // %for.cond.cleanup
+; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: ret
+entry:
+ %cmp31 = icmp eq i64 %N, 0
+ %cmp227 = icmp eq i64 %M, 0
+ %or.cond = or i1 %cmp31, %cmp227
+ %cmp624 = icmp eq i64 %K, 0
+ %or.cond46 = or i1 %or.cond, %cmp624
+ br i1 %or.cond46, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.cond1.for.cond
+ %i.033 = phi i64 [ %inc15, %for.cond1.for.cond ], [ 0, %entry ]
+ %sum.032 = phi i64 [ %spec.select, %for.cond1.for.cond ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %i.033
+ %0 = load ptr, ptr %arrayidx, align 8
+ br label %for.cond5.preheader
+
+for.cond5.preheader: ; preds = %for.cond5.for.cond, %for.cond1.preheader
+ %j.029 = phi i64 [ 0, %for.cond1.preheader ], [ %inc12, %for.cond5.for.cond ]
+ %sum.128 = phi i64 [ %sum.032, %for.cond1.preheader ], [ %spec.select, %for.cond5.for.cond ]
+ %arrayidx9 = getelementptr inbounds ptr, ptr %0, i64 %j.029
+ %1 = load ptr, ptr %arrayidx9, align 8
+ br label %for.body8
+
+for.body8: ; preds = %for.body8, %for.cond5.preheader
+ %k.026 = phi i64 [ 0, %for.cond5.preheader ], [ %inc, %for.body8 ]
+ %sum.225 = phi i64 [ %s...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70796
More information about the llvm-commits
mailing list