[PATCH] D94542: [X86] Default to -x86-pad-for-align=false to drop assembler difference with or w/o -g

Fangrui Song via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 12 12:15:26 PST 2021


MaskRay created this revision.
MaskRay added reviewers: reames, skan.
Herald added subscribers: pengfei, hiraditya.
MaskRay requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Fix the assembler part of PR42138: the D75203 <https://reviews.llvm.org/D75203> assembler optimization
locates MCRelaxableFragment's within two MCSymbol's and relaxes some
MCRelaxableFragment's to reduce the size of a MCAlignFragment.
A -g build has more MCSymbol's and therefore may have different assembler output
(e.g. a MCRelaxableFragment (jmp) may have 5 bytes with -O1 while 2 bytes with -O1 -g).

`.p2align 4, 0x90` is common due to loops. For a larger program, with a
lot of temporary labels, the assembly output difference is somewhat
destined. The cost seems to overweigh the benefits so we default to
-x86-pad-for-align=false until the heuristic is improved.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D94542

Files:
  llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
  llvm/test/MC/X86/align-via-padding-corner.s
  llvm/test/MC/X86/align-via-padding.s
  llvm/test/MC/X86/align-via-relaxation.s
  llvm/test/MC/X86/prefix-padding-32.s
  llvm/test/MC/X86/prefix-padding-64.s


Index: llvm/test/MC/X86/prefix-padding-64.s
===================================================================
--- llvm/test/MC/X86/prefix-padding-64.s
+++ llvm/test/MC/X86/prefix-padding-64.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=15 | llvm-objdump -d --section=.text - | FileCheck %s
+# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=15 -x86-pad-for-align=1 | llvm-objdump -d - | FileCheck %s
 
 # Check prefix padding generation for all cases on 64 bit x86.
 
Index: llvm/test/MC/X86/prefix-padding-32.s
===================================================================
--- llvm/test/MC/X86/prefix-padding-32.s
+++ llvm/test/MC/X86/prefix-padding-32.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -x86-pad-max-prefix-size=15 | llvm-objdump -d --section=.text - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -x86-pad-max-prefix-size=15 -x86-pad-for-align=1 | llvm-objdump -d - | FileCheck %s
 
 # Check prefix padding generation for all cases on 32 bit x86.
 
Index: llvm/test/MC/X86/align-via-relaxation.s
===================================================================
--- llvm/test/MC/X86/align-via-relaxation.s
+++ llvm/test/MC/X86/align-via-relaxation.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu -x86-pad-max-prefix-size=0 %s | llvm-objdump -d --section=.text - | FileCheck %s
+# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu -x86-pad-max-prefix-size=0 %s | llvm-objdump -d - | FileCheck %s --check-prefix=NOPAD
+# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu -x86-pad-max-prefix-size=0 -x86-pad-for-align=1 %s | llvm-objdump -d - | FileCheck %s
 
 # This test exercises only the padding via relaxation logic.  The  interaction
 # etween prefix padding and relaxation logic can be seen in align-via-padding.s
@@ -6,6 +7,19 @@
   .file "test.c"
   .text
   .section  .text
+
+# NOPAD-LABEL: <.text>:
+# NOPAD-NEXT:     0: eb 1f           jmp 0x21 <foo>
+# NOPAD-NEXT:     2: eb 1d           jmp 0x21 <foo>
+# NOPAD-NEXT:     4: eb 1b           jmp 0x21 <foo>
+# NOPAD-NEXT:     6: eb 19           jmp 0x21 <foo>
+# NOPAD-NEXT:     8: eb 17           jmp 0x21 <foo>
+# NOPAD-NEXT:     a: eb 15           jmp 0x21 <foo>
+# NOPAD-NEXT:     c: eb 13           jmp 0x21 <foo>
+# NOPAD-NEXT:     e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00  nopw    %cs:(%rax,%rax)
+# NOPAD-NEXT:    1d: 0f 1f 00        nopl (%rax)
+# NOPAD-NEXT:    20: cc              int3
+
   # Demonstrate that we can relax instructions to provide padding, not
   # just insert nops.  jmps are being used for ease of demonstration.
   # CHECK: .text
Index: llvm/test/MC/X86/align-via-padding.s
===================================================================
--- llvm/test/MC/X86/align-via-padding.s
+++ llvm/test/MC/X86/align-via-padding.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d --section=.text - | FileCheck %s
+# RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 -x86-pad-for-align=1 | llvm-objdump -d - | FileCheck %s
 
 # This test file highlights the interactions between prefix padding and
 # relaxation padding.
Index: llvm/test/MC/X86/align-via-padding-corner.s
===================================================================
--- llvm/test/MC/X86/align-via-padding-corner.s
+++ llvm/test/MC/X86/align-via-padding-corner.s
@@ -1,4 +1,4 @@
-  # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d - | FileCheck %s
+  # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 -x86-pad-for-align=1 | llvm-objdump -d - | FileCheck %s
 
 
   # The first test check the correctness cornercase - can't add prefixes on a
Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
===================================================================
--- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -109,7 +109,7 @@
     cl::desc("Maximum number of prefixes to use for padding"));
 
 cl::opt<bool> X86PadForAlign(
-    "x86-pad-for-align", cl::init(true), cl::Hidden,
+    "x86-pad-for-align", cl::init(false), cl::Hidden,
     cl::desc("Pad previous instructions to implement align directives"));
 
 cl::opt<bool> X86PadForBranchAlign(
@@ -957,6 +957,9 @@
   if (!X86PadForAlign && !X86PadForBranchAlign)
     return;
 
+  // The processed regions are delimitered by LabeledFragments. -g may have more
+  // MCSymbols and therefore different relaxation results. X86PadForAlign is
+  // disabled by default to eliminate the -g vs non -g difference.
   DenseSet<MCFragment *> LabeledFragments;
   for (const MCSymbol &S : Asm.symbols())
     LabeledFragments.insert(S.getFragment(false));


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D94542.316195.patch
Type: text/x-patch
Size: 5070 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210112/755f88bb/attachment.bin>


More information about the llvm-commits mailing list