[PATCH] D75945: Use 15 byte long nops on modern Intel processors

Philip Reames via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 10:53:09 PDT 2020


reames created this revision.
reames added reviewers: RKSimon, craig.topper, skan, spatel.
Herald added subscribers: bollu, hiraditya, mcrosier.
Herald added a project: LLVM.

Back in D42616 <https://reviews.llvm.org/D42616>, we switched our default nop length from 15 to 10 bytes because some platforms have painful decode stalls when encountering multiple instruction prefixes.  (10 byte long nops come from the fact that prefixes are used to pad after 8 bytes, and some platforms have issues w/more than two prefixes.)

Based on Agner's guides, it appears to be the case that modern Intel (SandyBridge and later) can decode an arbitrary number of prefixes without issue.  Intel's guide only provides up to 9 bytes; I read that as providing a safe default for all their chips.  Older chips and Atom series have serious decode stalls.  I can't find a conclusive reference beyond those two.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75945

Files:
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
  llvm/test/MC/X86/align-via-relaxation.s
  llvm/test/MC/X86/x86_long_nop.s


Index: llvm/test/MC/X86/x86_long_nop.s
===================================================================
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,12 +17,12 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knl %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 
Index: llvm/test/MC/X86/align-via-relaxation.s
===================================================================
--- llvm/test/MC/X86/align-via-relaxation.s
+++ llvm/test/MC/X86/align-via-relaxation.s
@@ -32,8 +32,7 @@
   # that would require a further round of relaxation
   # CHECK: <bar>:
   # CHECK: 22: eb fe                          jmp -2 <bar>
-  # CHECK: 24: 66 2e 0f 1f 84 00 00 00 00 00  nopw %cs:(%rax,%rax)
-  # CHECK: 2e: 66 90                          nop
+  # CHECK: 24: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
   # CHECK: 30: 0f 0b                          ud2
 
 bar:  
@@ -48,8 +47,8 @@
   # CHECK: <loop_preheader>:
   # CHECK: 45: 48 85 c0                       testq %rax, %rax
   # CHECK: 48: 0f 8e 22 00 00 00              jle 34 <loop_exit>
-  # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00  nopw %cs:(%rax,%rax)
-  # CHECK: 58: 0f 1f 84 00 00 00 00 00        nopl (%rax,%rax)
+  # CHECK: 4e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
+  # CHECK: 5d: 0f 1f 00                       nopl (%rax)
   # CHECK: <loop_header>:
   # CHECK: 60: 48 83 e8 01                    subq $1, %rax
   # CHECK: 64: 48 85 c0                       testq %rax, %rax
Index: llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
===================================================================
--- llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
+++ llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
@@ -69,8 +69,12 @@
 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:    #noautopadding
 ; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
 ; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
-; CHECK-NEXT:    nopl 8(%rax,%rax)
 ; CHECK-NEXT:    #autopadding
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -546,7 +546,8 @@
                                                   FeatureSlow3OpsLEA,
                                                   FeatureFastScalarFSQRT,
                                                   FeatureFastSHLDRotate,
-                                                  FeatureMergeToThreeWayBranch];
+                                                  FeatureMergeToThreeWayBranch,
+                                                  FeatureFast15ByteNOP];
   list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
                                                 FeaturePOPCNTFalseDeps];
   list<SubtargetFeature> SNBInheritableFeatures =


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D75945.249436.patch
Type: text/x-patch
Size: 5632 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200310/2a98ee5c/attachment.bin>


More information about the llvm-commits mailing list