[llvm] r355529 - [PPC] Adjust the computed branch offset for the possible shorter distance

Guozhi Wei via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 6 10:22:22 PST 2019


Author: carrot
Date: Wed Mar  6 10:22:22 2019
New Revision: 355529

URL: http://llvm.org/viewvc/llvm-project?rev=355529&view=rev
Log:
[PPC] Adjust the computed branch offset for the possible shorter distance

In file PPCBranchSelector.cpp we tend to over estimate code size due to large
alignment and inline assembly. Usually it causes larger computed branch offset,
it is not big problem. But sometimes it may also causes smaller computed branch
offset than actual branch offset. If the offset is close to the limit of
encoding, it may cause problem at run time.
Following is a simplified example.

           actual        estimated
           address        address
 ...
bne Far      100            10c
.p2align 4
Near:        110            110
 ...
Far:        8108           8108

Actual offset:    0x8108 - 0x100 = 0x8008
Computed offset:  0x8108 - 0x10c = 0x7ffc

The computed offset is at most ((1 << alignment) - 4) bytes smaller than actual
offset. So we add this number to the offset for safety.

Differential Revision: https://reviews.llvm.org/D57718


Added:
    llvm/trunk/test/CodeGen/PowerPC/branch_selector.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCBranchSelector.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCBranchSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCBranchSelector.cpp?rev=355529&r1=355528&r2=355529&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCBranchSelector.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCBranchSelector.cpp Wed Mar  6 10:22:22 2019
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-branch-select"
@@ -75,9 +76,11 @@ bool PPCBSel::runOnMachineFunction(Machi
   // Give the blocks of the function a dense, in-order, numbering.
   Fn.RenumberBlocks();
   BlockSizes.resize(Fn.getNumBlockIDs());
+  // The first block number which has imprecise instruction address.
+  int FirstImpreciseBlock = -1;
 
-  auto GetAlignmentAdjustment =
-    [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+  auto GetAlignmentAdjustment = [&FirstImpreciseBlock]
+      (MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
     unsigned Align = MBB.getAlignment();
     if (!Align)
       return 0;
@@ -90,6 +93,8 @@ bool PPCBSel::runOnMachineFunction(Machi
 
     // The alignment of this MBB is larger than the function's alignment, so we
     // can't tell whether or not it will insert nops. Assume that it will.
+    if (FirstImpreciseBlock < 0)
+      FirstImpreciseBlock = MBB.getNumber();
     return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
   };
 
@@ -123,8 +128,11 @@ bool PPCBSel::runOnMachineFunction(Machi
     }
 
     unsigned BlockSize = 0;
-    for (MachineInstr &MI : *MBB)
+    for (MachineInstr &MI : *MBB) {
       BlockSize += TII->getInstSizeInBytes(MI);
+      if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
+        FirstImpreciseBlock = MBB->getNumber();
+    }
 
     BlockSizes[MBB->getNumber()].first = BlockSize;
     FuncSize += BlockSize;
@@ -178,23 +186,87 @@ bool PPCBSel::runOnMachineFunction(Machi
         // Determine the offset from the current branch to the destination
         // block.
         int BranchSize;
+        unsigned MaxAlign = 2;
+        bool NeedExtraAdjustment = false;
         if (Dest->getNumber() <= MBB.getNumber()) {
           // If this is a backwards branch, the delta is the offset from the
           // start of this block to this branch, plus the sizes of all blocks
           // from this block to the dest.
           BranchSize = MBBStartOffset;
+          MaxAlign = std::max(MaxAlign, MBB.getAlignment());
 
-          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+          int DestBlock = Dest->getNumber();
+          BranchSize += BlockSizes[DestBlock].first;
+          for (unsigned i = DestBlock+1, e = MBB.getNumber(); i < e; ++i) {
             BranchSize += BlockSizes[i].first;
+            MaxAlign = std::max(MaxAlign,
+                                Fn.getBlockNumbered(i)->getAlignment());
+          }
+
+          NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                                (DestBlock >= FirstImpreciseBlock);
         } else {
           // Otherwise, add the size of the blocks between this block and the
           // dest to the number of bytes left in this block.
-          BranchSize = -MBBStartOffset;
+          unsigned StartBlock = MBB.getNumber();
+          BranchSize = BlockSizes[StartBlock].first - MBBStartOffset;
 
-          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+          MaxAlign = std::max(MaxAlign, Dest->getAlignment());
+          for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
             BranchSize += BlockSizes[i].first;
+            MaxAlign = std::max(MaxAlign,
+                                Fn.getBlockNumbered(i)->getAlignment());
+          }
+
+          NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                                (MBB.getNumber() >= FirstImpreciseBlock);
         }
 
+        // We tend to over estimate code size due to large alignment and
+        // inline assembly. Usually it causes larger computed branch offset.
+        // But sometimes it may also causes smaller computed branch offset
+        // than actual branch offset. If the offset is close to the limit of
+        // encoding, it may cause problem at run time.
+        // Following is a simplified example.
+        //
+        //              actual        estimated
+        //              address        address
+        //    ...
+        //   bne Far      100            10c
+        //   .p2align 4
+        //   Near:        110            110
+        //    ...
+        //   Far:        8108           8108
+        //
+        //   Actual offset:    0x8108 - 0x100 = 0x8008
+        //   Computed offset:  0x8108 - 0x10c = 0x7ffc
+        //
+        // This example also shows when we can get the largest gap between
+        // estimated offset and actual offset. If there is an aligned block
+        // ABB between branch and target, assume its alignment is <align>
+        // bits. Now consider the accumulated function size FSIZE till the end
+        // of previous block PBB. If the estimated FSIZE is multiple of
+        // 2^<align>, we don't need any padding for the estimated address of
+        // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
+        // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
+        // padding. It also means the actual branch offset is (2^<align> - 4)
+        // larger than computed offset. Other actual FSIZE needs less padding
+        // bytes, so causes smaller gap between actual and computed offset.
+        //
+        // On the other hand, if the inline asm or large alignment occurs
+        // between the branch block and destination block, the estimated address
+        // can be <delta> larger than actual address. If padding bytes are
+        // needed for a later aligned block, the actual number of padding bytes
+        // is at most <delta> more than estimated padding bytes. So the actual
+        // aligned block address is less than or equal to the estimated aligned
+        // block address. So the actual branch offset is less than or equal to
+        // computed branch offset.
+        //
+        // The computed offset is at most ((1 << alignment) - 4) bytes smaller
+        // than actual offset. So we add this number to the offset for safety.
+        if (NeedExtraAdjustment)
+          BranchSize += (1 << MaxAlign) - 4;
+
         // If this branch is in range, ignore it.
         if (isInt<16>(BranchSize)) {
           MBBStartOffset += 4;

Added: llvm/trunk/test/CodeGen/PowerPC/branch_selector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/branch_selector.ll?rev=355529&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/branch_selector.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/branch_selector.ll Wed Mar  6 10:22:22 2019
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @relax_bcc(i1 %b) {
+; CHECK-LABEL: relax_bcc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi. 3, 3, 1
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:  label:
+; CHECK-NEXT:    add 3, 3, 5
+; CHECK-NEXT:    cmpd    4, 3
+; CHECK-NEXT:    bne     0, label
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    bc 12, 1, .+8
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li 3, 101
+; CHECK-NEXT:    mtctr 3
+; CHECK-NEXT:    .p2align        4
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    bdnz .LBB0_2
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    .space 32748
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:  .LBB0_4: # %tail
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    blr
+entry:
+  call void asm sideeffect "label:\0A\09add 3,3,5\0A\09cmpd 4,3\0A\09bne label", ""()
+  br i1 %b, label %for.body, label %tail
+
+for.body:                                         ; preds = %for.body, %entry
+   %0 = phi i32 [0, %entry], [%1, %for.body]
+   %1 = add i32 %0, 1
+   %2 = icmp sgt i32 %1, 100
+   br i1 %2, label %exit, label %for.body
+
+exit:
+  call void asm sideeffect ".space 32748", ""()
+  br label %tail
+
+tail:
+  ret i32 1
+}




More information about the llvm-commits mailing list