[llvm] 7e2f961 - [MachineSink] Fix missing sinks along critical edges (#97618)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 9 10:48:26 PDT 2024


Author: Min-Yih Hsu
Date: 2024-07-09T10:48:22-07:00
New Revision: 7e2f96194fa56148281c0232b4fa0db5c9cabec3

URL: https://github.com/llvm/llvm-project/commit/7e2f96194fa56148281c0232b4fa0db5c9cabec3
DIFF: https://github.com/llvm/llvm-project/commit/7e2f96194fa56148281c0232b4fa0db5c9cabec3.diff

LOG: [MachineSink] Fix missing sinks along critical edges (#97618)

4e0bd3f improved early MachineLICM's capabilities to hoist COPY from
physical registers out of a loop. However, it accidentally broke one of
MachineSink's preconditions on sinking cheap instructions (in this case,
COPY) which considered those instructions being profitable to sink only
when there are at least two of them in the same def-use chain in the
same basic block. So if early MachineLICM hoisted one of them out,
MachineSink no longer sink rest of the cheap instructions. This results
in redundant load immediate instructions from the motivating example
we've seen on RISC-V.

This patch fixes this by teaching MachineSink that if there is more than
one demand to sink a register into the same block from different
critical edges, it should be considered profitable as it increases the
CSE opportunities.
This change also improves two of the AArch64's cases.

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AArch64/and-sink.ll
    llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
    llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 83c2895f91fbb..bbc5ab13a0cd3 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -130,6 +130,14 @@ namespace {
     // Remember which edges have been considered for breaking.
     SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
     CEBCandidates;
+    // Memorize the register that also wanted to sink into the same block along
+    // a 
diff erent critical edge.
+    // {register to sink, sink-to block} -> the first sink-from block.
+    // We're recording the first sink-from block because that (critical) edge
+    // was deferred until we see another register that's going to sink into the
+    // same block.
+    DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
+        CEMergeCandidates;
     // Remember which edges we are about to split.
     // This is 
diff erent from CEBCandidates since those edges
     // will be split.
@@ -197,14 +205,17 @@ namespace {
 
     void releaseMemory() override {
       CEBCandidates.clear();
+      CEMergeCandidates.clear();
     }
 
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
     void ProcessDbgInst(MachineInstr &MI);
-    bool isWorthBreakingCriticalEdge(MachineInstr &MI,
-                                     MachineBasicBlock *From,
-                                     MachineBasicBlock *To);
+    bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+                                    MachineBasicBlock *To, bool BreakPHIEdge);
+    bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
+                                     MachineBasicBlock *To,
+                                     MachineBasicBlock *&DeferredFromBlock);
 
     bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
                          MachineInstr &MI);
@@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
 
     // Process all basic blocks.
     CEBCandidates.clear();
+    CEMergeCandidates.clear();
     ToSplit.clear();
     for (auto &MBB: MF)
       MadeChange |= ProcessBlock(MBB);
@@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
   SeenDbgVars.insert(Var);
 }
 
-bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
-                                                 MachineBasicBlock *From,
-                                                 MachineBasicBlock *To) {
+bool MachineSinking::isWorthBreakingCriticalEdge(
+    MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
+    MachineBasicBlock *&DeferredFromBlock) {
   // FIXME: Need much better heuristics.
 
   // If the pass has already considered breaking this edge (during this pass
@@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
     return true;
 
+  // Check and record the register and the destination block we want to sink
+  // into. Note that we want to do the following before the next check on branch
+  // probability. Because we want to record the initial candidate even if it's
+  // on hot edge, so that other candidates that might not on hot edges can be
+  // sinked as well.
+  for (const auto &MO : MI.all_defs()) {
+    Register Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
+    auto Key = std::make_pair(SrcReg, To);
+    auto Res = CEMergeCandidates.try_emplace(Key, From);
+    // We wanted to sink the same register into the same block, consider it to
+    // be profitable.
+    if (!Res.second) {
+      // Return the source block that was previously held off.
+      DeferredFromBlock = Res.first->second;
+      return true;
+    }
+  }
+
   if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
       BranchProbability(SplitEdgeProbabilityThreshold, 100))
     return true;
@@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
   return false;
 }
 
-bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
-                                               MachineBasicBlock *FromBB,
-                                               MachineBasicBlock *ToBB,
-                                               bool BreakPHIEdge) {
-  if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
-    return false;
-
+bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
+                                                MachineBasicBlock *FromBB,
+                                                MachineBasicBlock *ToBB,
+                                                bool BreakPHIEdge) {
   // Avoid breaking back edge. From == To means backedge for single BB cycle.
   if (!SplitEdges || FromBB == ToBB)
     return false;
@@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
         return false;
   }
 
-  ToSplit.insert(std::make_pair(FromBB, ToBB));
-
   return true;
 }
 
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
+                                               MachineBasicBlock *FromBB,
+                                               MachineBasicBlock *ToBB,
+                                               bool BreakPHIEdge) {
+  bool Status = false;
+  MachineBasicBlock *DeferredFromBB = nullptr;
+  if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
+    // If there is a DeferredFromBB, we consider FromBB only if _both_
+    // of them are legal to split.
+    if ((!DeferredFromBB ||
+         ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
+         isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
+        isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
+      ToSplit.insert(std::make_pair(FromBB, ToBB));
+      if (DeferredFromBB)
+        ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
+      Status = true;
+    }
+  }
+
+  return Status;
+}
+
 std::vector<unsigned> &
 MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
   // Currently to save compiling time, MBB's register pressure will not change

diff  --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index f298a55dab721..c84310629e5fd 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -46,9 +46,8 @@ bb2:
 define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
 ; CHECK-LABEL: and_sink2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    adrp x9, A
-; CHECK-NEXT:    str wzr, [x9, :lo12:A]
+; CHECK-NEXT:    adrp x8, A
+; CHECK-NEXT:    str wzr, [x8, :lo12:A]
 ; CHECK-NEXT:    tbz w1, #0, .LBB1_5
 ; CHECK-NEXT:  // %bb.1: // %bb0.preheader
 ; CHECK-NEXT:    adrp x8, B
@@ -56,17 +55,15 @@ define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
 ; CHECK-NEXT:  .LBB1_2: // %bb0
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    str wzr, [x8, :lo12:B]
-; CHECK-NEXT:    tbz w2, #0, .LBB1_6
+; CHECK-NEXT:    tbz w2, #0, .LBB1_5
 ; CHECK-NEXT:  // %bb.3: // %bb1
 ; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    str wzr, [x9, :lo12:C]
 ; CHECK-NEXT:    tbnz w0, #2, .LBB1_2
 ; CHECK-NEXT:  // %bb.4:
-; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:  .LBB1_5: // %common.ret
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB1_6:
+; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index d92bbfd7a21d6..49e31447c1c0d 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -4,13 +4,11 @@
 define i64 @test_or(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_or:
 ; CHECK:       ; %bb.0: ; %bb1
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    cbnz w0, LBB0_2
+; CHECK-NEXT:  LBB0_1:
 ; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    cbnz w8, LBB0_2
-; CHECK-NEXT:  LBB0_1: ; %common.ret
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB0_2: ; %bb1.cond.split
-; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    cbz w1, LBB0_1
 ; CHECK-NEXT:  ; %bb.3: ; %bb4
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -37,13 +35,11 @@ bb4:
 define i64 @test_or_select(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_or_select:
 ; CHECK:       ; %bb.0: ; %bb1
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    cbnz w0, LBB1_2
+; CHECK-NEXT:  LBB1_1:
 ; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    cbnz w8, LBB1_2
-; CHECK-NEXT:  LBB1_1: ; %common.ret
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB1_2: ; %bb1.cond.split
-; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    cbz w1, LBB1_1
 ; CHECK-NEXT:  ; %bb.3: ; %bb4
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -70,13 +66,11 @@ bb4:
 define i64 @test_and(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_and:
 ; CHECK:       ; %bb.0: ; %bb1
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    cbnz w0, LBB2_2
+; CHECK-NEXT:  LBB2_1:
 ; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    cbnz w8, LBB2_2
-; CHECK-NEXT:  LBB2_1: ; %common.ret
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB2_2: ; %bb1.cond.split
-; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    cbz w1, LBB2_1
 ; CHECK-NEXT:  ; %bb.3: ; %bb4
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -103,13 +97,11 @@ bb4:
 define i64 @test_and_select(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_and_select:
 ; CHECK:       ; %bb.0: ; %bb1
-; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    cbnz w0, LBB3_2
+; CHECK-NEXT:  LBB3_1:
 ; CHECK-NEXT:    mov x0, xzr
-; CHECK-NEXT:    cbnz w8, LBB3_2
-; CHECK-NEXT:  LBB3_1: ; %common.ret
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB3_2: ; %bb1.cond.split
-; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    cbz w1, LBB3_1
 ; CHECK-NEXT:  ; %bb.3: ; %bb4
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill

diff  --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
index 21690c439b2d9..6714991d091cf 100644
--- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
+++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
@@ -13,7 +13,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    call toupper
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz s0, .LBB0_25
+; CHECK-NEXT:    beqz s0, .LBB0_26
 ; CHECK-NEXT:  # %bb.1: # %while.body.preheader
 ; CHECK-NEXT:    li a2, 1
 ; CHECK-NEXT:    li a3, 9
@@ -55,36 +55,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
 ; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    beq a2, a3, .LBB0_2
 ; CHECK-NEXT:  # %bb.14: # %while.body.6
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz a2, .LBB0_25
+; CHECK-NEXT:    beqz a2, .LBB0_24
 ; CHECK-NEXT:  # %bb.15: # %strdup.exit.split.loop.exit126
 ; CHECK-NEXT:    addi s0, s1, 7
-; CHECK-NEXT:    j .LBB0_24
-; CHECK-NEXT:  .LBB0_16: # %while.body
-; CHECK-NEXT:    bnez a2, .LBB0_18
 ; CHECK-NEXT:    j .LBB0_25
+; CHECK-NEXT:  .LBB0_16: # %while.body
+; CHECK-NEXT:    beqz a2, .LBB0_26
+; CHECK-NEXT:    j .LBB0_18
 ; CHECK-NEXT:  .LBB0_17: # %while.body.1
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz a2, .LBB0_25
+; CHECK-NEXT:    beqz a2, .LBB0_24
 ; CHECK-NEXT:  .LBB0_18: # %strdup.exit.loopexit
 ; CHECK-NEXT:    li s0, 0
-; CHECK-NEXT:    j .LBB0_24
+; CHECK-NEXT:    j .LBB0_25
 ; CHECK-NEXT:  .LBB0_19: # %while.body.3
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz a2, .LBB0_25
+; CHECK-NEXT:    beqz a2, .LBB0_24
 ; CHECK-NEXT:  # %bb.20: # %strdup.exit.split.loop.exit120
 ; CHECK-NEXT:    addi s0, s1, 4
-; CHECK-NEXT:    j .LBB0_24
+; CHECK-NEXT:    j .LBB0_25
 ; CHECK-NEXT:  .LBB0_21: # %while.body.4
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz a2, .LBB0_25
+; CHECK-NEXT:    beqz a2, .LBB0_24
 ; CHECK-NEXT:  # %bb.22: # %strdup.exit.split.loop.exit122
 ; CHECK-NEXT:    addi s0, s1, 5
-; CHECK-NEXT:    j .LBB0_24
+; CHECK-NEXT:    j .LBB0_25
 ; CHECK-NEXT:  .LBB0_23: # %while.body.5
+; CHECK-NEXT:    bnez a2, .LBB0_25
+; CHECK-NEXT:  .LBB0_24:
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    beqz a2, .LBB0_25
-; CHECK-NEXT:  .LBB0_24: # %strdup.exit
+; CHECK-NEXT:    j .LBB0_26
+; CHECK-NEXT:  .LBB0_25: # %strdup.exit
 ; CHECK-NEXT:    li s1, 0
 ; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    li a0, 0
@@ -95,7 +93,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    jalr s1
 ; CHECK-NEXT:    li a1, 1
-; CHECK-NEXT:  .LBB0_25: # %return
+; CHECK-NEXT:  .LBB0_26: # %return
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload


        


More information about the llvm-commits mailing list