[llvm] 7049cf6 - [BranchAlign] Fix bug w/nop padding for SS manipulation

Mon Mar 2 14:40:31 PST 2020

Author: Philip Reames
Date: 2020-03-02T14:40:25-08:00
New Revision: 7049cf6496c9aa8e355345a3fbea30861e4d2da8

URL: https://github.com/llvm/llvm-project/commit/7049cf6496c9aa8e355345a3fbea30861e4d2da8
DIFF: https://github.com/llvm/llvm-project/commit/7049cf6496c9aa8e355345a3fbea30861e4d2da8.diff

LOG: [BranchAlign] Fix bug w/nop padding for SS manipulation

X86 has several instructions which are documented as enabling interrupts exactly one instruction *after* the one which changes the SS segment register. Inserting a nop between these two instructions allows an interrupt to arrive before the execution of the following instruction which changes semantic behaviour.

The list of instructions is documented in "Table 24-3. Format of Interruptibility State" in Volume 3c of the Intel manual. They basically all come down to different ways to write to the SS register.

Differential Revision: https://reviews.llvm.org/D75359

Added: 
    llvm/test/MC/X86/align-branch-64-system.s

Modified: 
    llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 067748fdb1f8..ce48ea2519f8 100644

--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -373,6 +373,27 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
   return true;
 }
 
+/// X86 has certain instructions which enable interrupts exactly one
+/// instruction *after* the instruction which stores to SS.  Return true if the
+/// given instruction has such an interrupt delay slot.
+static bool hasInterruptDelaySlot(const MCInst &Inst) {
+  switch (Inst.getOpcode()) {
+  case X86::POPSS16:
+  case X86::POPSS32:
+  case X86::STI:
+    return true;
+
+  case X86::MOV16sr:
+  case X86::MOV32sr:
+  case X86::MOV64sr:
+  case X86::MOV16sm:
+    if (Inst.getOperand(0).getReg() == X86::SS)
+      return true;
+    break;
+  }
+  return false;
+}
+
 /// Check if the instruction operand needs to be aligned. Padding is disabled
 /// before intruction which may be rewritten by linker(e.g. TLSCALL).
 bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
@@ -401,7 +422,10 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
 
   MCFragment *CF = OS.getCurrentFragment();
   bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
-  if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+  if (hasInterruptDelaySlot(PrevInst)) {
+    // If this instruction follows an interrupt enabling instruction with a one
+    // instruction delay, inserting a nop would change behavior.
+  } else if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
     // Macro fusion actually happens and there is no other fragment inserted
     // after the previous instruction. NOP can be emitted in PF to align fused
     // jcc.

diff  --git a/llvm/test/MC/X86/align-branch-64-system.s b/llvm/test/MC/X86/align-branch-64-system.s
new file mode 100644
index 000000000000..b62a4e3e136f
--- /dev/null
+++ b/llvm/test/MC/X86/align-branch-64-system.s
@@ -0,0 +1,68 @@
+  # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu --x86-align-branch-boundary=32 --x86-align-branch=jmp %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s
+
+  # Exercise cases where we're enabling interrupts with one instruction delay
+  # and thus can't add a nop in between without changing behavior.
+
+  .text
+
+  # CHECK: 1e:       sti
+  # CHECK: 1f:       jmp
+  .p2align  5
+  .rept 30
+  int3
+  .endr
+  sti
+  jmp baz
+
+  # CHECK: 5c:       movq %rax, %ss
+  # CHECK: 5f:       jmp
+  .p2align  5
+  .rept 28
+  int3
+  .endr
+  movq %rax, %ss
+  jmp baz
+
+  # CHECK: 9d:       movl %esi, %ss
+  # CHECK: 9f:       jmp
+  .p2align  5
+  .rept 29
+  int3
+  .endr
+  movl %esi, %ss
+  jmp baz
+
+  # movw and movl are interchangeable since we're only using the low 16 bits.
+  # Both are generated as "MOV Sreg,r/m16**" (8E /r), but disassembled as movl
+  # CHECK: dd:       movl %esi, %ss
+  # CHECK: df:       jmp
+  .p2align  5
+  .rept 29
+  int3
+  .endr
+  movw %si, %ss
+  jmp baz
+
+  # CHECK: 11b:       movw (%esi), %ss
+  # CHECK: 11e:       jmp
+  .p2align  5
+  .rept 27
+  int3
+  .endr
+  movw (%esi), %ss
+  jmp baz
+
+  # CHECK: 15b:      	movw	(%rsi), %ss
+  # CHECK: 15d:     	jmp
+  .p2align  5
+  .rept 27
+  int3
+  .endr
+  movw (%rsi), %ss
+  jmp baz
+
+
+  int3
+  .section ".text.other"
+bar:
+  retq