[llvm] [AArch64, ELF] Allow implicit $d/$x at section beginning (PR #99703)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 13:39:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

The start state of a new section is `EMS_None`, often leading to a
$d/$x at offset 0. Introduce a MCTargetOption/cl::opt
"optimize-mapping-symbols" to allow an alternative behavior:

* Set the start state to `EMS_Data` or `EMS_A64`.
* For text sections, add an ending $x only if the final data is not instructions.
* For non-text sections, add an ending $d only if the final data is not data commands.

```
.section .text1,"ax"
nop
// emit $d
.long 42
// emit $x

.section .text2,"ax"
nop
```

This new behavior decreases the .symtab size significantly:

```
% ~/projects/bloaty/out/debug/bloaty a64-2/bin/clang -- a64-0/bin/clang
    FILE SIZE        VM SIZE
 --------------  --------------
  -5.4% -1.13Mi  [ = ]       0    .strtab
 -50.9% -4.09Mi  [ = ]       0    .symtab
  -4.0% -5.22Mi  [ = ]       0    TOTAL
```

---

This scheme works as long as the user can rule out some error scenarios:

* .text1 assembled using the traditional behavior is combined with .text2 using the new behavior
* A linker script combining non-text sections and text sections


---
Full diff: https://github.com/llvm/llvm-project/pull/99703.diff


5 Files Affected:

- (modified) llvm/include/llvm/MC/MCTargetOptions.h (+2) 
- (modified) llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h (+2) 
- (modified) llvm/lib/MC/MCTargetOptionsCommandFlags.cpp (+7) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp (+33-5) 
- (modified) llvm/test/MC/AArch64/mapping-across-sections.s (+10) 


``````````diff
diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h
index 899299fd15246..d16cb38fa0ae2 100644
--- a/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/llvm/include/llvm/MC/MCTargetOptions.h
@@ -64,6 +64,8 @@ class MCTargetOptions {
   // Use CREL relocation format for ELF.
   bool Crel = false;
 
+  bool OptimizeMappingSymbols = false;
+
   // If true, prefer R_X86_64_[REX_]GOTPCRELX to R_X86_64_GOTPCREL on x86-64
   // ELF.
   bool X86RelaxRelocations = true;
diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 9d592446f3ba7..f4b4e2cbcb450 100644
--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -53,6 +53,8 @@ bool getSaveTempLabels();
 
 bool getCrel();
 
+bool getOptimizeMappingSymbols();
+
 bool getX86RelaxRelocations();
 
 bool getX86Sse2Avx();
diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
index 813b1194b47cb..f3dcf499eb9f9 100644
--- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
@@ -48,6 +48,7 @@ MCOPT(bool, NoDeprecatedWarn)
 MCOPT(bool, NoTypeCheck)
 MCOPT(bool, SaveTempLabels)
 MCOPT(bool, Crel)
+MCOPT(bool, OptimizeMappingSymbols)
 MCOPT(bool, X86RelaxRelocations)
 MCOPT(bool, X86Sse2Avx)
 MCOPT(std::string, ABIName)
@@ -134,6 +135,11 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
                             cl::desc("Use CREL relocation format for ELF"));
   MCBINDOPT(Crel);
 
+  static cl::opt<bool> OptimizeMappingSymbols(
+      "optimize-mapping-symbols",
+      cl::desc("Allow mapping symbol at section beginning to be implicit"));
+  MCBINDOPT(OptimizeMappingSymbols);
+
   static cl::opt<bool> X86RelaxRelocations(
       "x86-relax-relocations",
       cl::desc(
@@ -174,6 +180,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
   Options.MCNoTypeCheck = getNoTypeCheck();
   Options.MCSaveTempLabels = getSaveTempLabels();
   Options.Crel = getCrel();
+  Options.OptimizeMappingSymbols = getOptimizeMappingSymbols();
   Options.X86RelaxRelocations = getX86RelaxRelocations();
   Options.X86Sse2Avx = getX86Sse2Avx();
   Options.EmitDwarfUnwind = getEmitDwarfUnwind();
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index bfeca4bd5a92d..6bd118b39e043 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -32,8 +32,10 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCWinCOFFStreamer.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Triple.h"
@@ -176,19 +178,29 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
 /// by MachO. Beware!
 class AArch64ELFStreamer : public MCELFStreamer {
 public:
+  friend AArch64TargetELFStreamer;
   AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
                      std::unique_ptr<MCObjectWriter> OW,
                      std::unique_ptr<MCCodeEmitter> Emitter)
       : MCELFStreamer(Context, std::move(TAB), std::move(OW),
                       std::move(Emitter)),
-        MappingSymbolCounter(0), LastEMS(EMS_None) {}
+        MappingSymbolCounter(0), LastEMS(EMS_None) {
+    auto *TO = getContext().getTargetOptions();
+    OptimizeMappingSymbols = TO && TO->OptimizeMappingSymbols;
+  }
 
   void changeSection(MCSection *Section, uint32_t Subsection = 0) override {
-    // We have to keep track of the mapping symbol state of any sections we
-    // use. Each one should start off as EMS_None, which is provided as the
-    // default constructor by DenseMap::lookup.
+    // We have to keep track of the mapping symbol state of any sections we use.
+    // The initial state is EMS_A64 for text sections and EMS_Data for the
+    // others.
     LastMappingSymbols[getCurrentSection().first] = LastEMS;
-    LastEMS = LastMappingSymbols.lookup(Section);
+    auto It = LastMappingSymbols.find(Section);
+    if (It != LastMappingSymbols.end())
+      LastEMS = It->second;
+    else if (OptimizeMappingSymbols)
+      LastEMS = Section->isText() ? EMS_A64 : EMS_Data;
+    else
+      LastEMS = EMS_None;
 
     MCELFStreamer::changeSection(Section, Subsection);
   }
@@ -280,6 +292,7 @@ class AArch64ELFStreamer : public MCELFStreamer {
   }
 
   int64_t MappingSymbolCounter;
+  bool OptimizeMappingSymbols;
 
   DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
   ElfMappingSymbol LastEMS;
@@ -304,6 +317,21 @@ void AArch64TargetELFStreamer::finish() {
   AArch64ELFStreamer &S = getStreamer();
   MCContext &Ctx = S.getContext();
   auto &Asm = S.getAssembler();
+
+  // If OptimizeMappingSymbols is specified, ensure that text sections end with
+  // the A64 state while non-text sections end with the data state. When
+  // sections are combined by the linker, the subsequent section will start with
+  // the right tate.
+  if (S.OptimizeMappingSymbols) {
+    for (MCSection &Sec : Asm) {
+      S.switchSection(&Sec);
+      if (Sec.isText())
+        S.emitA64MappingSymbol();
+      else
+        S.emitDataMappingSymbol();
+    }
+  }
+
   MCSectionELF *MemtagSec = nullptr;
   for (const MCSymbol &Symbol : Asm.symbols()) {
     const auto &Sym = cast<MCSymbolELF>(Symbol);
diff --git a/llvm/test/MC/AArch64/mapping-across-sections.s b/llvm/test/MC/AArch64/mapping-across-sections.s
index 6bb5a8811b57d..49151326caf6c 100644
--- a/llvm/test/MC/AArch64/mapping-across-sections.s
+++ b/llvm/test/MC/AArch64/mapping-across-sections.s
@@ -1,4 +1,5 @@
 // RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-objdump -t - | FileCheck %s
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -optimize-mapping-symbols %s | llvm-objdump -t - | FileCheck %s --check-prefix=CHECK1
 
 .section .text1,"ax"
 add w0, w0, w0
@@ -38,3 +39,12 @@ add w0, w0, w0
 // CHECK-NEXT: 0000000000000004 l       .rodata        0000000000000000 $x.7
 // CHECK-NEXT: 0000000000000000 l       .comment       0000000000000000 $d.8
 // CHECK-NOT:  {{.}}
+
+// CHECK1:      SYMBOL TABLE:
+// CHECK1-NEXT: 0000000000000004 l       .text  0000000000000000 $d.0
+// CHECK1-NEXT: 0000000000000008 l       .text  0000000000000000 $x.1
+// CHECK1-NEXT: 000000000000000c l       .text  0000000000000000 $d.2
+// CHECK1-NEXT: 0000000000000004 l       .rodata        0000000000000000 $x.3
+// CHECK1-NEXT: 0000000000000010 l       .text  0000000000000000 $x.4
+// CHECK1-NEXT: 0000000000000008 l       .rodata        0000000000000000 $d.5
+// CHECK-NOT:  {{.}}

``````````

</details>


https://github.com/llvm/llvm-project/pull/99703


More information about the llvm-commits mailing list