[llvm] 29ccb12 - [BranchAlign] Compiler support for suppressing branch align

Wed Jan 8 10:03:36 PST 2020

Author: Philip Reames
Date: 2020-01-08T10:03:30-08:00
New Revision: 29ccb12e2c12b6a50a1451ffdbf70fef29efda0e

URL: https://github.com/llvm/llvm-project/commit/29ccb12e2c12b6a50a1451ffdbf70fef29efda0e
DIFF: https://github.com/llvm/llvm-project/commit/29ccb12e2c12b6a50a1451ffdbf70fef29efda0e.diff

LOG: [BranchAlign] Compiler support for suppressing branch align

As discussed heavily in the original review (D70157), there's a need for the compiler to be able to selective suppress padding (either nop or prefix) to respect assumptions about the meaning of labels and instructions in generated code.

Rather than wait for syntax to be finalized - which appears to be a very slow process - this patch focuses on the compiler use case and *only* worries about the integrated assembler. To my knowledge, this covers all cases mentioned to date for clang/JIT support.

For testing purposes, I wired it up so that if the integrated assembler was using autopadding for branch alignment (e.g. enabled at command line) then the textual assembly output would contain a comment for each location where padding was enabled or disabled. This seemed like the least painful choice overall.

Note that the result of this patch effective disables the jcc errata mitigation for many constructs (statepoints, implicit null checks, xray, etc...) which is non ideal. It is at least *correct* and should allow us to enable the mitigation for the compiler. Once that's done, and a few other items are worked through, we probably want to come back to this an explore a bundling based approach instead so that we can pad instructions while keeping labels in the right place.

Differential Revision: https://reviews.llvm.org/D72303

Added: 
    llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll
    llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll

Modified: 
    llvm/include/llvm/MC/MCAsmBackend.h
    llvm/include/llvm/MC/MCStreamer.h
    llvm/lib/MC/MCAsmStreamer.cpp
    llvm/lib/MC/MCObjectStreamer.cpp
    llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
    llvm/lib/Target/X86/X86MCInstLower.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 279c2300565e..ed7d5c7f01f4 100644

--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -46,6 +46,10 @@ class MCAsmBackend {
 
   const support::endianness Endian;
 
+  /// Return true if this target might automatically pad instructions and thus
+  /// need to emit padding enable/disable directives around sensative code.
+  virtual bool allowAutoPadding() const { return false; }
+
   /// Give the target a chance to manipulate state related to instruction
   /// alignment (e.g. padding for optimization) before and after actually
   /// emitting the instruction.

diff  --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index aab511a1e457..ba1649d33d12 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -223,6 +223,13 @@ class MCStreamer {
 
   bool UseAssemblerInfoForParsing;
 
+  /// Is the assembler allowed to insert padding automatically?  For
+  /// correctness reasons, we sometimes need to ensure instructions aren't
+  /// seperated in unexpected ways.  At the moment, this feature is only
+  /// useable from an integrated assembler, but assembly syntax is under
+  /// discussion for future inclusion.
+  bool AllowAutoPadding = false;
+
 protected:
   MCStreamer(MCContext &Ctx);
 
@@ -267,6 +274,9 @@ class MCStreamer {
     return TargetStreamer.get();
   }
 
+  void setAllowAutoPadding(bool v) { AllowAutoPadding = v; }
+  bool getAllowAutoPadding() const { return AllowAutoPadding; }
+
   /// When emitting an object file, create and emit a real label. When emitting
   /// textual assembly, this should do nothing to avoid polluting our output.
   virtual MCSymbol *EmitCFILabel();

diff  --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index d220c650b8eb..5d369503995b 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -77,6 +77,8 @@ class MCAsmStreamer final : public MCStreamer {
     assert(InstPrinter);
     if (IsVerboseAsm)
         InstPrinter->setCommentStream(CommentStream);
+    if (Assembler->getBackendPtr())
+      setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
   }
 
   MCAssembler &getAssembler() { return *Assembler; }

diff  --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index d8a7ab043eb8..3d1358df475f 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -29,7 +29,10 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
     : MCStreamer(Context),
       Assembler(std::make_unique<MCAssembler>(
           Context, std::move(TAB), std::move(Emitter), std::move(OW))),
-      EmitEHFrame(true), EmitDebugFrame(false) {}
+      EmitEHFrame(true), EmitDebugFrame(false) {
+  if (Assembler->getBackendPtr())
+    setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
+}
 
 MCObjectStreamer::~MCObjectStreamer() {}
 

diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 623aff088b17..85e24dc98f59 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -155,6 +155,7 @@ class X86AsmBackend : public MCAsmBackend {
     AlignBranchType = X86AlignBranchKindLoc;
   }
 
+  bool allowAutoPadding() const override;
   void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
   void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
 
@@ -410,10 +411,15 @@ static bool hasVariantSymbol(const MCInst &MI) {
   return false;
 }
 
+bool X86AsmBackend::allowAutoPadding() const {
+  return (AlignBoundary != Align::None() &&
+          AlignBranchType != X86::AlignBranchNone);
+}
+
 bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
-  if (AlignBoundary == Align::None() ||
-      AlignBranchType == X86::AlignBranchNone)
+  if (!OS.getAllowAutoPadding())
     return false;
+  assert(allowAutoPadding() && "incorrect initialization!");
 
   MCAssembler &Assembler = OS.getAssembler();
   MCSection *Sec = OS.getCurrentSectionOnly();

diff  --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e6b6bd70faf4..3cb911563c2a 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1142,10 +1142,35 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
   }
 }
 
+/// A RAII helper which defines a region of instructions which can't have
+/// padding added between them for correctness.
+struct NoAutoPaddingScope {
+  MCStreamer &OS;
+  const bool OldAllowAutoPadding;
+  NoAutoPaddingScope(MCStreamer &OS)
+    : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
+    changeAndComment(false);
+  }
+  ~NoAutoPaddingScope() {
+    changeAndComment(OldAllowAutoPadding);
+  }
+  void changeAndComment(bool b) {
+    if (b == OS.getAllowAutoPadding())
+      return;
+    OS.setAllowAutoPadding(b);
+    if (b)
+      OS.emitRawComment("autopadding");
+    else
+      OS.emitRawComment("noautopadding");
+  }
+};
+
 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
                                     X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   StatepointOpers SOpers(&MI);
   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
     EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
@@ -1207,6 +1232,8 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
   //                  <opcode>, <operands>
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   Register DefRegister = FaultingMI.getOperand(0).getReg();
   FaultMaps::FaultKind FK =
       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
@@ -1254,6 +1281,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
                                       X86MCInstLower &MCIL) {
   // PATCHABLE_OP minsize, opcode, operands
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   unsigned MinSize = MI.getOperand(0).getImm();
   unsigned Opcode = MI.getOperand(1).getImm();
 
@@ -1309,6 +1338,8 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
 
   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   auto &Ctx = OutStreamer->getContext();
   MCSymbol *MILabel = Ctx.createTempSymbol();
   OutStreamer->EmitLabel(MILabel);
@@ -1368,6 +1399,8 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
                                               X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern, which follows the x86 calling
   // convention to prepare for the trampoline call to be patched in.
   //
@@ -1462,6 +1495,8 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
                                                     X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern, which follows the x86 calling
   // convention to prepare for the trampoline call to be patched in.
   //
@@ -1559,6 +1594,9 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                   X86MCInstLower &MCIL) {
+
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern:
   //
   //   .p2align 1, ...
@@ -1586,6 +1624,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
                                        X86MCInstLower &MCIL) {
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // Since PATCHABLE_RET takes the opcode of the return statement as an
   // argument, we use that to emit the correct form of the RET that we want.
   // i.e. when we see this:
@@ -1616,6 +1656,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
                                              X86MCInstLower &MCIL) {
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
   // instruction so we lower that particular instruction and its operands.
   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how

diff  --git a/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll b/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll
new file mode 100644
index 000000000000..bebbea199bed
--- /dev/null
+++ b/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -O3 -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call -filetype=obj < %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s
+
+;; This file is a companion to align-branch-boundary-suppressions.ll.
+;; It exists to demonstrate that suppressions are actually wired into the
+;; integrated assembler.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK: 1: callq
+; CHECK-NEXT: 6: callq
+; CHECK-NEXT: b: callq
+; CHECK-NEXT: 10: callq
+; CHECK-NEXT: 15: callq
+; CHECK-NEXT: 1a: callq
+; CHECK-NEXT: 1f: callq
+entry:
+  ; Each of these will be 5 bytes, pushing the statepoint to offset=30.
+  ; For a normal call, this would force padding between the last normal
+  ; call and the safepoint, but since we've suppressed alignment that won't
+  ; happen for the safepoint.  That's non-ideal, we'd really prefer to do
+  ; the alignment and just keep the label with the statepoint call. (TODO)
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare void @foo()
+declare zeroext i1 @return_i1()
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)

diff  --git a/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
new file mode 100644
index 000000000000..7db20187d45d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -O3 -enable-implicit-null-checks -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call+jmp+indirect+ret+jcc < %s | FileCheck %s
+
+;; The tests in this file check that various constructs which need to disable
+;; prefix and/or nop padding do so in the right places.  However, since we
+;; don't yet have assembler syntax for this, they're only able to check
+;; comments and must hope the assembler does the right thing.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; If we have autopadding enabled, make sure the label isn't separated from
+; the mov.
+define i32 @implicit_null_check(i32* %x) {
+; CHECK-LABEL: implicit_null_check:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    movl (%rdi), %eax # on-fault: .LBB0_1
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:  # %bb.2: # %not_null
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_1: # %is_null
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    retq
+
+ entry:
+  %c = icmp eq i32* %x, null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !{}
+
+ is_null:
+  ret i32 42
+
+ not_null:
+  %t = load atomic i32, i32* %x unordered, align 4
+  ret i32 %t
+}
+
+; Label must bind to call before
+define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test_statepoint:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:    callq return_i1
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare zeroext i1 @return_i1()
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+
+
+; Label must bind to following nop sequence
+define void @patchpoint(i64 %a, i64 %b) {
+; CHECK-LABEL: patchpoint:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+entry:
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* null, i32 0, i64 %a, i64 %b)
+  ret void
+}
+
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)