[llvm] 3a503ce - [X86] Reduce the number of emitted fragments due to branch align

Shengchen Kan via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 12 00:37:55 PDT 2020


Author: Shengchen Kan
Date: 2020-03-12T15:37:35+08:00
New Revision: 3a503ce66318ed65d071f6401af5750640d33444

URL: https://github.com/llvm/llvm-project/commit/3a503ce66318ed65d071f6401af5750640d33444
DIFF: https://github.com/llvm/llvm-project/commit/3a503ce66318ed65d071f6401af5750640d33444.diff

LOG: [X86] Reduce the number of emitted fragments due to branch align

Summary:
Currently, a BoundaryAlign fragment may be inserted after the branch
that needs to be aligned to truncate the current fragment, this fragment is
unused at most of time. To avoid that, we can insert a new empty Data
fragment instead. Non-relaxable instruction is usually emitted into Data
fragment, so the inserted empty Data fragment will be reused at a high
possibility.

Reviewers: annita.zhang, reames, MaskRay, craig.topper, LuoYuanke, jyknight

Reviewed By: reames, LuoYuanke

Subscribers: llvm-commits, dexonsmith, hiraditya

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75438

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCFragment.h
    llvm/lib/MC/MCAssembler.cpp
    llvm/lib/MC/MCFragment.cpp
    llvm/lib/MC/MCObjectStreamer.cpp
    llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
    llvm/test/MC/X86/align-branch-64-negative.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index e052098611a9..bde0835b6a55 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -523,20 +523,16 @@ class MCCVDefRangeFragment : public MCEncodedFragmentWithFixups<32, 4> {
 class MCBoundaryAlignFragment : public MCFragment {
   /// The alignment requirement of the branch to be aligned.
   Align AlignBoundary;
-  /// Flag to indicate whether the branch is fused.  Use in determining the
-  /// region of fragments being aligned.
-  bool Fused : 1;
-  /// Flag to indicate whether NOPs should be emitted.
-  bool EmitNops : 1;
+  /// The last fragment in the set of fragments to be aligned.
+  const MCFragment *LastFragment = nullptr;
   /// The size of the fragment.  The size is lazily set during relaxation, and
   /// is not meaningful before that.
   uint64_t Size = 0;
 
 public:
-  MCBoundaryAlignFragment(Align AlignBoundary = Align(1), bool Fused = false,
-                          bool EmitNops = false, MCSection *Sec = nullptr)
-      : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary),
-        Fused(Fused), EmitNops(EmitNops) {}
+  MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr)
+      : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) {
+  }
 
   uint64_t getSize() const { return Size; }
   void setSize(uint64_t Value) { Size = Value; }
@@ -544,11 +540,11 @@ class MCBoundaryAlignFragment : public MCFragment {
   Align getAlignment() const { return AlignBoundary; }
   void setAlignment(Align Value) { AlignBoundary = Value; }
 
-  bool isFused() const { return Fused; }
-  void setFused(bool Value) { Fused = Value; }
-
-  bool canEmitNops() const { return EmitNops; }
-  void setEmitNops(bool Value) { EmitNops = Value; }
+  const MCFragment *getLastFragment() const { return LastFragment; }
+  void setLastFragment(const MCFragment *F) {
+    assert(!F || getParent() == F->getParent());
+    LastFragment = F;
+  }
 
   static bool classof(const MCFragment *F) {
     return F->getKind() == MCFragment::FT_BoundaryAlign;

diff  --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index b32c9b5fdfac..83f0d62bac2a 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -996,27 +996,22 @@ static bool needPadding(uint64_t StartAddr, uint64_t Size,
 
 bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
                                      MCBoundaryAlignFragment &BF) {
-  // The MCBoundaryAlignFragment that doesn't emit NOP should not be relaxed.
-  if (!BF.canEmitNops())
+  // BoundaryAlignFragment that doesn't need to align any fragment should not be
+  // relaxed.
+  if (!BF.getLastFragment())
     return false;
 
-  uint64_t AlignedOffset = Layout.getFragmentOffset(BF.getNextNode());
+  uint64_t AlignedOffset = Layout.getFragmentOffset(&BF);
   uint64_t AlignedSize = 0;
-  const MCFragment *F = BF.getNextNode();
-  // If the branch is unfused, it is emitted into one fragment, otherwise it is
-  // emitted into two fragments at most, the next MCBoundaryAlignFragment(if
-  // exists) also marks the end of the branch.
-  for (auto i = 0, N = BF.isFused() ? 2 : 1;
-       i != N && !isa<MCBoundaryAlignFragment>(F); ++i, F = F->getNextNode()) {
+  for (const MCFragment *F = BF.getLastFragment(); F != &BF;
+       F = F->getPrevNode())
     AlignedSize += computeFragmentSize(Layout, *F);
-  }
-  uint64_t OldSize = BF.getSize();
-  AlignedOffset -= OldSize;
+
   Align BoundaryAlignment = BF.getAlignment();
   uint64_t NewSize = needPadding(AlignedOffset, AlignedSize, BoundaryAlignment)
                          ? offsetToAlignment(AlignedOffset, BoundaryAlignment)
                          : 0U;
-  if (NewSize == OldSize)
+  if (NewSize == BF.getSize())
     return false;
   BF.setSize(NewSize);
   Layout.invalidateFragmentsFrom(&BF);

diff  --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index a96b8e86aed3..79e675c59c6b 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -424,14 +424,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
   }
   case MCFragment::FT_BoundaryAlign: {
     const auto *BF = cast<MCBoundaryAlignFragment>(this);
-    if (BF->canEmitNops())
-      OS << " (can emit nops to align";
-    if (BF->isFused())
-      OS << " fused branch)";
-    else
-      OS << " unfused branch)";
     OS << "\n       ";
     OS << " BoundarySize:" << BF->getAlignment().value()
+       << " LastFragment:" << BF->getLastFragment()
        << " Size:" << BF->getSize();
     break;
   }

diff  --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 33fcc20cb7b7..a36cdc4c1abb 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -215,15 +215,6 @@ MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) {
   return F;
 }
 
-MCBoundaryAlignFragment *MCObjectStreamer::getOrCreateBoundaryAlignFragment() {
-  auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(getCurrentFragment());
-  if (!F || F->canEmitNops()) {
-    F = new MCBoundaryAlignFragment();
-    insert(F);
-  }
-  return F;
-}
-
 void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) {
   Assembler->registerSymbol(Sym);
 }

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 426ddb7a29b5..2f84d23d6db5 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -133,6 +133,7 @@ class X86AsmBackend : public MCAsmBackend {
   bool needAlign(MCObjectStreamer &OS) const;
   bool needAlignInst(const MCInst &Inst) const;
   MCInst PrevInst;
+  MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr;
 
 public:
   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
@@ -493,27 +494,30 @@ bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
           (AlignBranchType & X86::AlignBranchIndirect));
 }
 
-/// Insert MCBoundaryAlignFragment before instructions to align branches.
+/// Insert BoundaryAlignFragment before instructions to align branches.
 void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
                                        const MCInst &Inst) {
   if (!needAlign(OS))
     return;
 
-  MCFragment *CF = OS.getCurrentFragment();
-  bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
-  if (hasInterruptDelaySlot(PrevInst)) {
+  if (hasInterruptDelaySlot(PrevInst))
     // If this instruction follows an interrupt enabling instruction with a one
     // instruction delay, inserting a nop would change behavior.
-  } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+    return;
+
+  if (!isMacroFused(PrevInst, Inst))
+    // Macro fusion doesn't happen indeed, clear the pending.
+    PendingBoundaryAlign = nullptr;
+
+  if (PendingBoundaryAlign &&
+      OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) {
     // Macro fusion actually happens and there is no other fragment inserted
-    // after the previous instruction. NOP can be emitted in PF to align fused
-    // jcc.
-    if (auto *PF =
-            dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) {
-      const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true);
-      const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true);
-    }
-  } else if (needAlignInst(Inst)) {
+    // after the previous instruction.
+    //
+    // Do nothing here since we already inserted a BoudaryAlign fragment when
+    // we met the first instruction in the fused pair and we'll tie them
+    // together in alignBranchesEnd.
+    //
     // Note: When there is at least one fragment, such as MCAlignFragment,
     // inserted after the previous instruction, e.g.
     //
@@ -525,36 +529,38 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
     //
     // We will treat the JCC as a unfused branch although it may be fused
     // with the CMP.
-    auto *F = OS.getOrCreateBoundaryAlignFragment();
-    F->setAlignment(AlignBoundary);
-    F->setEmitNops(true);
-    F->setFused(false);
-  } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) {
-    // We don't know if macro fusion happens until the reaching the next
-    // instruction, so a place holder is put here if necessary.
-    auto *F = OS.getOrCreateBoundaryAlignFragment();
-    F->setAlignment(AlignBoundary);
+    return;
   }
 
-  PrevInst = Inst;
+  if (needAlignInst(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
+                              isFirstMacroFusibleInst(Inst, *MCII))) {
+    // If we meet a unfused branch or the first instuction in a fusiable pair,
+    // insert a BoundaryAlign fragment.
+    OS.insert(PendingBoundaryAlign =
+                  new MCBoundaryAlignFragment(AlignBoundary));
+  }
 }
 
-/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned
-/// if necessary.
+/// Set the last fragment to be aligned for the BoundaryAlignFragment.
 void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
   if (!needAlign(OS))
     return;
-  // If the branch is emitted into a MCRelaxableFragment, we can determine the
-  // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the
-  // branch is fused, the fused branch(macro fusion pair) must be emitted into
-  // two fragments. Or when the branch is unfused, the branch must be emitted
-  // into one fragment. The MCRelaxableFragment naturally marks the end of the
-  // fused or unfused branch.
-  // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of
-  // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align
-  // other branch.
-  if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment()))
-    OS.insert(new MCBoundaryAlignFragment(AlignBoundary));
+
+  PrevInst = Inst;
+
+  if (!needAlignInst(Inst) || !PendingBoundaryAlign)
+    return;
+
+  // Tie the aligned instructions into a a pending BoundaryAlign.
+  PendingBoundaryAlign->setLastFragment(OS.getCurrentFragment());
+  PendingBoundaryAlign = nullptr;
+
+  // We need to ensure that further data isn't added to the current
+  // DataFragment, so that we can get the size of instructions later in
+  // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
+  // DataFragment.
+  if (isa_and_nonnull<MCDataFragment>(OS.getCurrentFragment()))
+    OS.insert(new MCDataFragment());
 
   // Update the maximum alignment on the current section if necessary.
   MCSection *Sec = OS.getCurrentSectionOnly();
@@ -862,16 +868,12 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
       // If we're looking at a boundary align, make sure we don't try to pad
       // its target instructions for some following directive.  Doing so would
       // break the alignment of the current boundary align.
-      if (F.getKind() == MCFragment::FT_BoundaryAlign) {
-        auto &BF = cast<MCBoundaryAlignFragment>(F);
-        const MCFragment *F = BF.getNextNode();
-        // If the branch is unfused, it is emitted into one fragment, otherwise
-        // it is emitted into two fragments at most, the next
-        // MCBoundaryAlignFragment(if exists) also marks the end of the branch.
-        for (int i = 0, N = BF.isFused() ? 2 : 1;
-             i != N && !isa<MCBoundaryAlignFragment>(F);
-             ++i, F = F->getNextNode(), I++) {
-        }
+      if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
+        const MCFragment *LastFragment = BF->getLastFragment();
+        if (!LastFragment)
+          continue;
+        while (&*I != LastFragment)
+          ++I;
       }
     }
   }

diff  --git a/llvm/test/MC/X86/align-branch-64-negative.s b/llvm/test/MC/X86/align-branch-64-negative.s
index 54e9e70561e5..87c465ca7e5a 100644
--- a/llvm/test/MC/X86/align-branch-64-negative.s
+++ b/llvm/test/MC/X86/align-branch-64-negative.s
@@ -24,31 +24,13 @@ labeled_call_test1:
 label_before:
   callq bar
 
-  # In the second test, we have a label which is expected to be bound to the
-  # end of the call.  For instance, we want the to associate some data with
-  # the return address of the call.
-  # CHECK-LABEL: <labeled_call_test2>:
-  # CHECK: 5a: callq
-  # CHECK: 5f: nop
-  # CHECK: 60 <label_after>:
-  # CHECK: 60: jmp
-  .globl  labeled_call_test2
-  .p2align  5
-labeled_call_test2:
-  .rept 26
-  int3
-  .endr
-  callq bar
-label_after:
-  jmp bar
-
-  # Our third test is like the first w/a labeled fault, but specifically to
+  # Our second test is like the first w/a labeled fault, but specifically to
   # a fused memory comparison.  This is the form produced by implicit null
   # checks for instance.
   # CHECK-LABEL: <implicit_null_check>:
-  # CHECK: 9f <fault_addr>:
-  # CHECK: 9f: nop
-  # CHECK: a0: cmpq
+  # CHECK: 5f <fault_addr>:
+  # CHECK: 5f: nop
+  # CHECK: 60: cmpq
   .globl  implicit_null_check
   .p2align  5
 implicit_null_check:


        


More information about the llvm-commits mailing list