[llvm] e223e45 - Reland "[AArch64][CodeGen] Avoid inverting hot branches during relaxation""

Daniel Hoekwater via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 21 10:30:03 PDT 2023


Author: Daniel Hoekwater
Date: 2023-08-21T17:29:47Z
New Revision: e223e4567722661f1b32aa052cd13d9f47b896d9

URL: https://github.com/llvm/llvm-project/commit/e223e4567722661f1b32aa052cd13d9f47b896d9
DIFF: https://github.com/llvm/llvm-project/commit/e223e4567722661f1b32aa052cd13d9f47b896d9.diff

LOG: Reland "[AArch64][CodeGen] Avoid inverting hot branches during relaxation""

This is a reland of 46d2d7599d9ed5e68fb53e910feb10d47ee2667b, which was
reverted because of breaking build
https://lab.llvm.org/buildbot/#/builders/21/builds/78779. However, this
buildbot is spuriously broken due to Flang::underscoring.f90 being
nondeterministic.

Added: 
    

Modified: 
    llvm/lib/CodeGen/BranchRelaxation.cpp
    llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 378f8bfda20361..2995732e0aa86b 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -79,6 +79,10 @@ class BranchRelaxation : public MachineFunctionPass {
   };
 
   SmallVector<BasicBlockInfo, 16> BlockInfo;
+
+  // The basic block after which trampolines are inserted. This is the last
+  // basic block that isn't in the cold section.
+  MachineBasicBlock *TrampolineInsertionPoint = nullptr;
   std::unique_ptr<RegScavenger> RS;
   LivePhysRegs LiveRegs;
 
@@ -166,16 +170,27 @@ LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
 void BranchRelaxation::scanFunction() {
   BlockInfo.clear();
   BlockInfo.resize(MF->getNumBlockIDs());
+  TrampolineInsertionPoint = nullptr;
 
   // First thing, compute the size of all basic blocks, and see if the function
   // has any inline assembly in it. If so, we have to be conservative about
   // alignment assumptions, as we don't know for sure the size of any
-  // instructions in the inline assembly.
-  for (MachineBasicBlock &MBB : *MF)
+  // instructions in the inline assembly. At the same time, place the
+  // trampoline insertion point at the end of the hot portion of the function.
+  for (MachineBasicBlock &MBB : *MF) {
     BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
 
+    if (MBB.getSectionID() != MBBSectionID::ColdSectionID)
+      TrampolineInsertionPoint = &MBB;
+  }
+
   // Compute block offsets and known bits.
   adjustBlockOffsets(*MF->begin());
+
+  if (TrampolineInsertionPoint == nullptr) {
+    LLVM_DEBUG(dbgs() << "  No suitable trampoline insertion point found in "
+                      << MF->getName() << ".\n");
+  }
 }
 
 /// computeBlockSize - Compute the size for MBB.
@@ -376,6 +391,50 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
   assert(!Fail && "branches to be relaxed must be analyzable");
   (void)Fail;
 
+  // Since cross-section conditional branches to the cold section are rarely
+  // taken, try to avoid inverting the condition. Instead, add a "trampoline
+  // branch", which unconditionally branches to the branch destination. Place
+  // the trampoline branch at the end of the function and retarget the
+  // conditional branch to the trampoline.
+  // tbz L1
+  // =>
+  // tbz L1Trampoline
+  // ...
+  // L1Trampoline: b  L1
+  if (MBB->getSectionID() != TBB->getSectionID() &&
+      TBB->getSectionID() == MBBSectionID::ColdSectionID &&
+      TrampolineInsertionPoint != nullptr) {
+    // If the insertion point is out of range, we can't put a trampoline there.
+    NewBB =
+        createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock());
+
+    if (isBlockInRange(MI, *NewBB)) {
+      LLVM_DEBUG(dbgs() << "  Retarget destination to trampoline at "
+                        << NewBB->back());
+
+      insertUncondBranch(NewBB, TBB);
+
+      // Update the successor lists to include the trampoline.
+      MBB->replaceSuccessor(TBB, NewBB);
+      NewBB->addSuccessor(TBB);
+
+      // Replace branch in the current (MBB) block.
+      removeBranch(MBB);
+      insertBranch(MBB, NewBB, FBB, Cond);
+
+      TrampolineInsertionPoint = NewBB;
+      finalizeBlockChanges(MBB, NewBB);
+      return true;
+    }
+
+    LLVM_DEBUG(
+        dbgs() << "  Trampoline insertion point out of range for Bcc from "
+               << printMBBReference(*MBB) << " to " << printMBBReference(*TBB)
+               << ".\n");
+    TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection());
+    MF->erase(NewBB);
+  }
+
   // Add an unconditional branch to the destination and invert the branch
   // condition to jump over it:
   // tbz L1

diff  --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
index 1cf307cd16ecff..231bc886dd3b74 100644
--- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
+++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
@@ -1,4 +1,5 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s
 
 --- |
   declare i32 @bar()
@@ -21,6 +22,73 @@
     br label %end
   }
 
+  define void @tbz_hot_to_cold(i1 zeroext %0) {
+    br i1 %0, label %hot_block, label %cold_block
+  
+  hot_block:                                        ; preds = %1
+    %2 = call i32 @baz()
+    br label %end
+  
+  end:                                              ; preds = %cold_block, %hot_block
+    %3 = tail call i32 @qux()
+    ret void
+  
+  cold_block:                                       ; preds = %1
+    %4 = call i32 @bar()
+    br label %end
+  }
+
+  define void @tbz_no_valid_tramp(i1 zeroext %0) {
+    br i1 %0, label %hot, label %cold
+  
+  hot:                                              ; preds = %1
+    %2 = call i32 @baz()
+    call void asm sideeffect ".space 1024", ""()
+    br label %end
+  
+  end:                                              ; preds = %cold, %hot
+    %3 = tail call i32 @qux()
+    ret void
+  
+  cold:                                             ; preds = %1
+    %4 = call i32 @bar()
+    br label %end
+  }
+
+  define void @tbz_cold_to_hot(i1 zeroext %0) #0 {
+    br i1 %0, label %cold_block, label %hot_block
+  
+  cold_block:                                       ; preds = %1
+    %2 = call i32 @baz()
+    br label %end
+  
+  end:                                              ; preds = %hot_block, %cold_block
+    %3 = tail call i32 @qux()
+    ret void
+  
+  hot_block:                                        ; preds = %1
+    %4 = call i32 @bar()
+    br label %end
+  }
+
+  define void @tbz_tramp_pushed_oob(i1 zeroext %0, i1 zeroext %1) {
+  entry:
+    %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"()
+    br i1 %0, label %unrelaxable, label %cold
+
+  unrelaxable:                                      ; preds = %entry
+    br i1 %1, label %end, label %cold
+
+  end:                                              ; preds = %unrelaxable
+    call void asm sideeffect ".space 996", ""()
+    call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
+    ret void
+
+  cold:                                            ; preds = %entry, %unrelaxable
+    call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
+    ret void
+  }
+
 ...
 ---
 name:            relax_tbz
@@ -69,3 +137,201 @@ body:             |
     early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
     TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
 ...
+---
+name:            tbz_hot_to_cold
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  ; CHECK-LABEL: name: tbz_hot_to_cold
+  ; COM: Check that branch relaxation relaxes cross-section conditional
+  ; COM:   branches by creating trampolines after all other hot basic blocks.
+  ; CHECK: bb.0 (%ir-block.1):
+  ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK:  TBZW
+  ; CHECK-SAME: %bb.3
+  ; CHECK:  bb.1.hot_block:
+  ; CHECK:    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+  ; CHECK:  bb.3 (%ir-block.1):
+  ; CHECK-NEXT:    successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:    B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:  bb.2.cold_block (bbsections Cold):
+  ; CHECK:    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+  bb.0 (%ir-block.1):
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $w0, $lr
+
+    early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+    TBZW killed renamable $w0, 0, %bb.2
+
+  bb.1.hot_block:
+    BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+  bb.2.cold_block (bbsections Cold):
+    BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name:            tbz_no_valid_tramp
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  hasRedZone:      false
+body:             |
+  ; CHECK-LABEL: name: tbz_no_valid_tramp
+  ; COM: Check that branch relaxation doesn't insert a trampoline if there is no
+  ; COM:   viable insertion location.
+  ; CHECK:    bb.0 (%ir-block.1):
+  ; CHECK-NEXT:    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK:    CBNZW
+  ; CHECK-SAME:    %bb.1
+  ; CHECK-NEXT: B
+  ; CHECK-SAME:   %bb.3
+  ; CHECK:  bb.1.hot:
+  ; CHECK:    TCRETURNdi
+  ; CHECK:  bb.2.cold (bbsections Cold):
+  ; CHECK:    TCRETURNdi
+  bb.0 (%ir-block.1):
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $w0, $lr
+  
+    early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+    CBZW killed renamable $w0, %bb.2
+  
+  bb.1.hot:
+    BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    INLINEASM &".space 1024", 1 /* sideeffect attdialect */
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+  
+  bb.2.cold (bbsections Cold):
+    BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name:            tbz_cold_to_hot
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  hasRedZone:      false
+body:             |
+  ; CHECK-LABEL: name: tbz_cold_to_hot
+  ; COM: Check that relaxation of conditional branches from the Cold section to
+  ; COM:   the Hot section doesn't modify the Hot section.
+  ; CHECK:  bb.0 (%ir-block.1, bbsections Cold):
+  ; CHECK-NEXT:    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:    CBNZW
+  ; CHECK-SAME:     %bb.1
+  ; CHECK-NEXT:    B %bb.2
+  ; CHECK:  bb.1.cold_block (bbsections Cold):
+  ; CHECK:    TCRETURNdi
+  ; CHECK:  bb.2.hot_block:
+  ; CHECK:    TCRETURNdi
+  bb.0 (%ir-block.1, bbsections Cold):
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $w0, $lr
+  
+    early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+    CBZW killed renamable $w0, %bb.2
+  
+  bb.1.cold_block (bbsections Cold):
+    BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+  
+  bb.2.hot_block:
+    BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name:            tbz_tramp_pushed_oob
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+  - { reg: '$w1', virtual-reg: '' }
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  hasRedZone:      false
+body:             |
+  ; INDIRECT-LABEL: name: tbz_tramp_pushed_oob
+  ; COM: Check that a conditional branch to a trampoline is properly relaxed
+  ; COM:   if the trampoline is pushed out of range.
+  ; INDIRECT:      bb.0.entry:
+  ; INDIRECT-NEXT:   successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000)
+  ; INDIRECT:        TBNZW
+  ; INDIRECT-SAME:         %bb.1
+  ; INDIRECT-NEXT:    B{{ }}
+  ; INDIRECT-SAME:           %[[TRAMP1]]
+  ; INDIRECT:      bb.1.unrelaxable:
+  ; INDIRECT-NEXT:   successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000)
+  ; INDIRECT:        TBNZW
+  ; INDIRECT-SAME:         %bb.2
+  ; INDIRECT:      [[TRAMP2]]
+  ; INDIRECT-NEXT:   successors: %bb.3(0x80000000)
+  ; INDIRECT:      bb.2.end:
+  ; INDIRECT:        TCRETURNdi
+  ; INDIRECT:      [[TRAMP1]].entry:
+  ; INDIRECT:        successors: %bb.3(0x80000000)
+  ; INDIRECT-NOT:  bbsections Cold
+  ; INDIRECT:      bb.3.cold (bbsections Cold):
+  ; INDIRECT:        TCRETURNdi
+
+  bb.0.entry (%ir-block.entry):
+    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    liveins: $w0, $w1, $lr
+
+    early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+    INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16
+    TBZW killed renamable $w0, 0, %bb.3
+
+  bb.1.unrelaxable:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+    liveins: $w1, $x16
+
+    TBNZW killed renamable $w1, 0, %bb.2
+
+    B %bb.3
+
+  bb.2.end:
+    liveins: $x16
+
+    INLINEASM &".space 996", 1 /* sideeffect attdialect */
+    INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+  
+  bb.3.cold (bbsections Cold):
+    liveins: $x16
+
+    INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
+    early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+    TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...


        


More information about the llvm-commits mailing list