[llvm] [MachineSink] Fix missing sinks along critical edges (PR #97618)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 09:22:50 PDT 2024
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/97618
>From 418d1857cdfaa3653cb149af4ec04c235a34577c Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 3 Jul 2024 10:34:33 -0700
Subject: [PATCH 1/3] Pre-commit test
---
.../RISCV/machine-sink-load-immediate.ll | 187 ++++++++++++++++++
1 file changed, 187 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
new file mode 100644
index 0000000000000..b85aeb8a05f58
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 < %s | FileCheck %s
+
+define i1 @sink_li(ptr %text, ptr %text.addr.0) {
+; CHECK-LABEL: sink_li:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: call toupper
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz s0, .LBB0_25
+; CHECK-NEXT: # %bb.1: # %while.body.preheader
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: li a3, 9
+; CHECK-NEXT: li a4, 32
+; CHECK-NEXT: .LBB0_2: # %while.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_4
+; CHECK-NEXT: # %bb.3: # %while.body
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bne a2, a3, .LBB0_16
+; CHECK-NEXT: .LBB0_4: # %while.body.1
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_6
+; CHECK-NEXT: # %bb.5: # %while.body.1
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bne a2, a3, .LBB0_17
+; CHECK-NEXT: .LBB0_6: # %while.body.3
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_8
+; CHECK-NEXT: # %bb.7: # %while.body.3
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bne a2, a4, .LBB0_19
+; CHECK-NEXT: .LBB0_8: # %while.body.4
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_10
+; CHECK-NEXT: # %bb.9: # %while.body.4
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bne a2, a4, .LBB0_21
+; CHECK-NEXT: .LBB0_10: # %while.body.5
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_12
+; CHECK-NEXT: # %bb.11: # %while.body.5
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bne a2, a3, .LBB0_23
+; CHECK-NEXT: .LBB0_12: # %while.body.6
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bnez a2, .LBB0_2
+; CHECK-NEXT: # %bb.13: # %while.body.6
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: beq a2, a3, .LBB0_2
+; CHECK-NEXT: # %bb.14: # %while.body.6
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
+; CHECK-NEXT: addi s0, s1, 7
+; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: .LBB0_16: # %while.body
+; CHECK-NEXT: bnez a2, .LBB0_18
+; CHECK-NEXT: j .LBB0_25
+; CHECK-NEXT: .LBB0_17: # %while.body.1
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: .LBB0_19: # %while.body.3
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
+; CHECK-NEXT: addi s0, s1, 4
+; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: .LBB0_21: # %while.body.4
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
+; CHECK-NEXT: addi s0, s1, 5
+; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: .LBB0_23: # %while.body.5
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: .LBB0_24: # %strdup.exit
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: mv a1, s0
+; CHECK-NEXT: jalr s1
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: jalr s1
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: .LBB0_25: # %return
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %call = call i32 @toupper()
+ %tobool.not = icmp eq ptr %text, null
+ br i1 %tobool.not, label %return, label %while.body
+
+while.body: ; preds = %while.body.6, %while.body.6, %entry
+ switch i8 1, label %strdup.exit.split.loop.exit114 [
+ i8 1, label %while.body.1
+ i8 9, label %while.body.1
+ i8 0, label %return
+ ]
+
+while.body.1: ; preds = %while.body, %while.body
+ switch i8 1, label %strdup.exit [
+ i8 1, label %while.body.3
+ i8 9, label %while.body.3
+ i8 0, label %return
+ ]
+
+while.body.3: ; preds = %while.body.1, %while.body.1
+ switch i8 1, label %strdup.exit.split.loop.exit120 [
+ i8 32, label %while.body.4
+ i8 1, label %while.body.4
+ i8 0, label %return
+ ]
+
+while.body.4: ; preds = %while.body.3, %while.body.3
+ switch i8 1, label %strdup.exit.split.loop.exit122 [
+ i8 32, label %while.body.5
+ i8 1, label %while.body.5
+ i8 0, label %return
+ ]
+
+while.body.5: ; preds = %while.body.4, %while.body.4
+ switch i8 1, label %strdup.exit.split.loop.exit124 [
+ i8 1, label %while.body.6
+ i8 9, label %while.body.6
+ i8 0, label %return
+ ]
+
+while.body.6: ; preds = %while.body.5, %while.body.5
+ switch i8 1, label %strdup.exit.split.loop.exit126 [
+ i8 1, label %while.body
+ i8 9, label %while.body
+ i8 0, label %return
+ ]
+
+strdup.exit.split.loop.exit114: ; preds = %while.body
+ br label %strdup.exit
+
+strdup.exit.split.loop.exit120: ; preds = %while.body.3
+ %incdec.ptr.3.le = getelementptr i8, ptr %text.addr.0, i64 4
+ br label %strdup.exit
+
+strdup.exit.split.loop.exit122: ; preds = %while.body.4
+ %incdec.ptr.4.le = getelementptr i8, ptr %text.addr.0, i64 5
+ br label %strdup.exit
+
+strdup.exit.split.loop.exit124: ; preds = %while.body.5
+ br label %strdup.exit
+
+strdup.exit.split.loop.exit126: ; preds = %while.body.6
+ %incdec.ptr.6.le = getelementptr i8, ptr %text.addr.0, i64 7
+ br label %strdup.exit
+
+strdup.exit: ; preds = %strdup.exit.split.loop.exit126, %strdup.exit.split.loop.exit124, %strdup.exit.split.loop.exit122, %strdup.exit.split.loop.exit120, %strdup.exit.split.loop.exit114, %while.body.1
+ %text.addr.0.lcssa = phi ptr [ null, %strdup.exit.split.loop.exit114 ], [ %incdec.ptr.3.le, %strdup.exit.split.loop.exit120 ], [ %incdec.ptr.4.le, %strdup.exit.split.loop.exit122 ], [ %text, %strdup.exit.split.loop.exit124 ], [ %incdec.ptr.6.le, %strdup.exit.split.loop.exit126 ], [ null, %while.body.1 ]
+ %call5.i = tail call ptr null(ptr null, ptr %text.addr.0.lcssa)
+ %memchr64 = tail call ptr null(ptr null, i32 %call, i64 0)
+ br label %return
+
+return: ; preds = %strdup.exit, %while.body.6, %while.body.5, %while.body.4, %while.body.3, %while.body.1, %while.body, %entry
+ %retval.1 = phi i1 [ false, %entry ], [ true, %strdup.exit ], [ false, %while.body ], [ false, %while.body.1 ], [ false, %while.body.3 ], [ false, %while.body.4 ], [ false, %while.body.5 ], [ false, %while.body.6 ]
+ ret i1 %retval.1
+}
+
+declare i32 @toupper()
>From 5208a8ae94625c3e3e64db7d9a092488ecf5890f Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 3 Jul 2024 10:39:41 -0700
Subject: [PATCH 2/3] [MachineSink] Fix missing sink along critical edges
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from
physical registers out of a loop. However, it accidentally broke one of
MachineSink's preconditions on sinking cheap instructions (in this case,
COPY) which considered those instructions being profitable to sink only
when there are at least two of them in the same def-use chain in the
same basic block. So if early MachineLICM hoisted one of them out,
MachineSink no longer sink rest of the cheap instructions. This results in
redundant load immediate instructions from the motivating example we've
seen on RISC-V.
This patch fixes this by teaching MachineSink that if there is more than
one demand to sink a register into the same block from different
critical edges, it should be considered profitable as it increases the
CSE opportunities.
---
llvm/lib/CodeGen/MachineSink.cpp | 81 +++++++++++++++----
llvm/test/CodeGen/AArch64/and-sink.ll | 13 ++-
.../AArch64/fast-isel-branch-cond-split.ll | 24 ++----
.../RISCV/machine-sink-load-immediate.ll | 34 ++++----
4 files changed, 95 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 4dabaabe3659f..6c8b4a6b7338b 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -130,6 +130,14 @@ namespace {
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
CEBCandidates;
+ // Memorize the register that also wanted to sink into the same block along
+ // a different critical edge.
+ // {register to sink, sink-to block} -> the first sink-from block.
+ // We're recording the first sink-from block because that (critical) edge
+ // was deferred until we see another register that's going to sink into the
+ // same block.
+ DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
+ CEMergeCandidates;
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
@@ -197,14 +205,17 @@ namespace {
void releaseMemory() override {
CEBCandidates.clear();
+ CEMergeCandidates.clear();
}
private:
bool ProcessBlock(MachineBasicBlock &MBB);
void ProcessDbgInst(MachineInstr &MI);
- bool isWorthBreakingCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To);
+ bool isLegalBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+ MachineBasicBlock *To, bool BreakPHIEdge);
+ bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ MachineBasicBlock *&DeferredFromBlock);
bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
MachineInstr &MI);
@@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
CEBCandidates.clear();
+ CEMergeCandidates.clear();
ToSplit.clear();
for (auto &MBB: MF)
MadeChange |= ProcessBlock(MBB);
@@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
SeenDbgVars.insert(Var);
}
-bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To) {
+bool MachineSinking::isWorthBreakingCriticalEdge(
+ MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
+ MachineBasicBlock *&DeferredFromBlock) {
// FIXME: Need much better heuristics.
// If the pass has already considered breaking this edge (during this pass
@@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
+ // Check and record the register and the destination block we want to sink
+ // into. Note that we want to do the following before the next check on branch
+ // probability. Because we want to record the initial candidate even if it's
+ // on hot edge, so that other candidates that might not on hot edges can be
+ // sinked as well.
+ for (const auto &MO : MI.all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
+ auto Key = std::make_pair(SrcReg, To);
+ auto Res = CEMergeCandidates.insert(std::make_pair(Key, From));
+ // We wanted to sink the same register into the same block, consider it to
+ // be profitable.
+ if (!Res.second) {
+ // Return the source block that was previously holded off.
+ DeferredFromBlock = Res.first->second;
+ return true;
+ }
+ }
+
if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
BranchProbability(SplitEdgeProbabilityThreshold, 100))
return true;
@@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
return false;
}
-bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB,
- bool BreakPHIEdge) {
- if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
- return false;
-
+bool MachineSinking::isLegalBreakingCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
// Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
@@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return false;
}
- ToSplit.insert(std::make_pair(FromBB, ToBB));
-
return true;
}
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ bool Status = false;
+ MachineBasicBlock *DeferredFromBB = nullptr;
+ if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
+ // If there is a DeferredFromBB, we consider FromBB only if _both_
+ // of them are legal to split.
+ if ((!DeferredFromBB ||
+ ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
+ isLegalBreakingCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
+ isLegalBreakingCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+ if (DeferredFromBB)
+ ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
+ Status = true;
+ }
+ }
+
+ return Status;
+}
+
std::vector<unsigned> &
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index f298a55dab721..c84310629e5fd 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -46,9 +46,8 @@ bb2:
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-LABEL: and_sink2:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: adrp x9, A
-; CHECK-NEXT: str wzr, [x9, :lo12:A]
+; CHECK-NEXT: adrp x8, A
+; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbz w1, #0, .LBB1_5
; CHECK-NEXT: // %bb.1: // %bb0.preheader
; CHECK-NEXT: adrp x8, B
@@ -56,17 +55,15 @@ define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-NEXT: .LBB1_2: // %bb0
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str wzr, [x8, :lo12:B]
-; CHECK-NEXT: tbz w2, #0, .LBB1_6
+; CHECK-NEXT: tbz w2, #0, .LBB1_5
; CHECK-NEXT: // %bb.3: // %bb1
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x9, :lo12:C]
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
; CHECK-NEXT: // %bb.4:
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: .LBB1_5: // %common.ret
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_6:
+; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index d92bbfd7a21d6..49e31447c1c0d 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -4,13 +4,11 @@
define i64 @test_or(i32 %a, i32 %b) {
; CHECK-LABEL: test_or:
; CHECK: ; %bb.0: ; %bb1
-; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: cbnz w0, LBB0_2
+; CHECK-NEXT: LBB0_1:
; CHECK-NEXT: mov x0, xzr
-; CHECK-NEXT: cbnz w8, LBB0_2
-; CHECK-NEXT: LBB0_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %bb1.cond.split
-; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB0_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -37,13 +35,11 @@ bb4:
define i64 @test_or_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_or_select:
; CHECK: ; %bb.0: ; %bb1
-; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: cbnz w0, LBB1_2
+; CHECK-NEXT: LBB1_1:
; CHECK-NEXT: mov x0, xzr
-; CHECK-NEXT: cbnz w8, LBB1_2
-; CHECK-NEXT: LBB1_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB1_2: ; %bb1.cond.split
-; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB1_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -70,13 +66,11 @@ bb4:
define i64 @test_and(i32 %a, i32 %b) {
; CHECK-LABEL: test_and:
; CHECK: ; %bb.0: ; %bb1
-; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: cbnz w0, LBB2_2
+; CHECK-NEXT: LBB2_1:
; CHECK-NEXT: mov x0, xzr
-; CHECK-NEXT: cbnz w8, LBB2_2
-; CHECK-NEXT: LBB2_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB2_2: ; %bb1.cond.split
-; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB2_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -103,13 +97,11 @@ bb4:
define i64 @test_and_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_and_select:
; CHECK: ; %bb.0: ; %bb1
-; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: cbnz w0, LBB3_2
+; CHECK-NEXT: LBB3_1:
; CHECK-NEXT: mov x0, xzr
-; CHECK-NEXT: cbnz w8, LBB3_2
-; CHECK-NEXT: LBB3_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB3_2: ; %bb1.cond.split
-; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB3_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
index b85aeb8a05f58..00d10b9f2bc0c 100644
--- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -18,7 +18,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) {
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz s0, .LBB0_25
+; CHECK-NEXT: beqz s0, .LBB0_26
; CHECK-NEXT: # %bb.1: # %while.body.preheader
; CHECK-NEXT: li a2, 1
; CHECK-NEXT: li a3, 9
@@ -60,36 +60,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) {
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: beq a2, a3, .LBB0_2
; CHECK-NEXT: # %bb.14: # %while.body.6
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
; CHECK-NEXT: addi s0, s1, 7
-; CHECK-NEXT: j .LBB0_24
-; CHECK-NEXT: .LBB0_16: # %while.body
-; CHECK-NEXT: bnez a2, .LBB0_18
; CHECK-NEXT: j .LBB0_25
+; CHECK-NEXT: .LBB0_16: # %while.body
+; CHECK-NEXT: beqz a2, .LBB0_26
+; CHECK-NEXT: j .LBB0_18
; CHECK-NEXT: .LBB0_17: # %while.body.1
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
; CHECK-NEXT: li s0, 0
-; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_19: # %while.body.3
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
; CHECK-NEXT: addi s0, s1, 4
-; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_21: # %while.body.4
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz a2, .LBB0_25
+; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
; CHECK-NEXT: addi s0, s1, 5
-; CHECK-NEXT: j .LBB0_24
+; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_23: # %while.body.5
+; CHECK-NEXT: bnez a2, .LBB0_25
+; CHECK-NEXT: .LBB0_24:
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: beqz a2, .LBB0_25
-; CHECK-NEXT: .LBB0_24: # %strdup.exit
+; CHECK-NEXT: j .LBB0_26
+; CHECK-NEXT: .LBB0_25: # %strdup.exit
; CHECK-NEXT: li s1, 0
; CHECK-NEXT: mv s2, a0
; CHECK-NEXT: li a0, 0
@@ -100,7 +98,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: jalr s1
; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: .LBB0_25: # %return
+; CHECK-NEXT: .LBB0_26: # %return
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
>From b5683f3465afdaf174b18c8dcbab6937fb3d0487 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 8 Jul 2024 09:22:28 -0700
Subject: [PATCH 3/3] Address review comments
---
llvm/lib/CodeGen/MachineSink.cpp | 20 +++++++++----------
.../RISCV/machine-sink-load-immediate.ll | 7 +------
2 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 6c8b4a6b7338b..43323c6a11f3b 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -211,8 +211,8 @@ namespace {
private:
bool ProcessBlock(MachineBasicBlock &MBB);
void ProcessDbgInst(MachineInstr &MI);
- bool isLegalBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
- MachineBasicBlock *To, bool BreakPHIEdge);
+ bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+ MachineBasicBlock *To, bool BreakPHIEdge);
bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
MachineBasicBlock *To,
MachineBasicBlock *&DeferredFromBlock);
@@ -910,11 +910,11 @@ bool MachineSinking::isWorthBreakingCriticalEdge(
continue;
Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
auto Key = std::make_pair(SrcReg, To);
- auto Res = CEMergeCandidates.insert(std::make_pair(Key, From));
+ auto Res = CEMergeCandidates.try_emplace(Key, From);
// We wanted to sink the same register into the same block, consider it to
// be profitable.
if (!Res.second) {
- // Return the source block that was previously holded off.
+ // Return the source block that was previously held off.
DeferredFromBlock = Res.first->second;
return true;
}
@@ -954,10 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(
return false;
}
-bool MachineSinking::isLegalBreakingCriticalEdge(MachineInstr &MI,
- MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB,
- bool BreakPHIEdge) {
+bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
// Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
@@ -1029,8 +1029,8 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
// of them are legal to split.
if ((!DeferredFromBB ||
ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
- isLegalBreakingCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
- isLegalBreakingCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
+ isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
+ isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
ToSplit.insert(std::make_pair(FromBB, ToBB));
if (DeferredFromBB)
ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
index 00d10b9f2bc0c..6714991d091cf 100644
--- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -1,19 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv64 < %s | FileCheck %s
-define i1 @sink_li(ptr %text, ptr %text.addr.0) {
+define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-LABEL: sink_li:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: .cfi_offset s1, -24
-; CHECK-NEXT: .cfi_offset s2, -32
; CHECK-NEXT: mv s1, a1
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
More information about the llvm-commits
mailing list