[llvm-branch-commits] [llvm] [Driver] Add -Wa, options -mmapsyms={default, implicit} (PR #104542)

Fangrui Song via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Aug 22 09:21:00 PDT 2024


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/104542

>From 5a55a8f84d28a70560dec646abd35d84a3a38fe9 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 15 Aug 2024 19:29:05 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5-bogner

[skip ci]
---
 llvm/include/llvm/MC/MCAssembler.h            |  1 +
 llvm/include/llvm/MC/MCTargetOptions.h        |  2 +
 .../llvm/MC/MCTargetOptionsCommandFlags.h     |  2 +
 llvm/lib/MC/MCTargetOptionsCommandFlags.cpp   | 10 +++
 .../MCTargetDesc/AArch64ELFStreamer.cpp       | 76 ++++++++++++++++---
 .../test/MC/AArch64/mapping-across-sections.s | 57 +++++++++++---
 6 files changed, 129 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index c6fa48128d1891..a68eb49fda2825 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -218,6 +218,7 @@ class MCAssembler {
   const_iterator begin() const { return Sections.begin(); }
   const_iterator end() const { return Sections.end(); }
 
+  SmallVectorImpl<const MCSymbol *> &getSymbols() { return Symbols; }
   iterator_range<pointee_iterator<
       typename SmallVector<const MCSymbol *, 0>::const_iterator>>
   symbols() const {
diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h
index 899299fd15246a..a5371b3387a13d 100644
--- a/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/llvm/include/llvm/MC/MCTargetOptions.h
@@ -64,6 +64,8 @@ class MCTargetOptions {
   // Use CREL relocation format for ELF.
   bool Crel = false;
 
+  bool ImplicitMapSyms = false;
+
   // If true, prefer R_X86_64_[REX_]GOTPCRELX to R_X86_64_GOTPCREL on x86-64
   // ELF.
   bool X86RelaxRelocations = true;
diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 9d592446f3ba77..5e82bc53f3b5ed 100644
--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -53,6 +53,8 @@ bool getSaveTempLabels();
 
 bool getCrel();
 
+bool getImplicitMapSyms();
+
 bool getX86RelaxRelocations();
 
 bool getX86Sse2Avx();
diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
index 813b1194b47cbf..1a4f7e93eeb74a 100644
--- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
@@ -48,6 +48,7 @@ MCOPT(bool, NoDeprecatedWarn)
 MCOPT(bool, NoTypeCheck)
 MCOPT(bool, SaveTempLabels)
 MCOPT(bool, Crel)
+MCOPT(bool, ImplicitMapSyms)
 MCOPT(bool, X86RelaxRelocations)
 MCOPT(bool, X86Sse2Avx)
 MCOPT(std::string, ABIName)
@@ -134,6 +135,14 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
                             cl::desc("Use CREL relocation format for ELF"));
   MCBINDOPT(Crel);
 
+  static cl::opt<bool> ImplicitMapSyms(
+      "implicit-mapsyms",
+      cl::desc("Allow mapping symbol at section beginning to be implicit, "
+               "lowering number of mapping symbols at the expense of some "
+               "portability. Recommended for projects that can build all their "
+               "object files using this option"));
+  MCBINDOPT(ImplicitMapSyms);
+
   static cl::opt<bool> X86RelaxRelocations(
       "x86-relax-relocations",
       cl::desc(
@@ -174,6 +183,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
   Options.MCNoTypeCheck = getNoTypeCheck();
   Options.MCSaveTempLabels = getSaveTempLabels();
   Options.Crel = getCrel();
+  Options.ImplicitMapSyms = getImplicitMapSyms();
   Options.X86RelaxRelocations = getX86RelaxRelocations();
   Options.X86Sse2Avx = getX86Sse2Avx();
   Options.EmitDwarfUnwind = getEmitDwarfUnwind();
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index ed670bce594ec6..667844f51f079e 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -24,14 +24,15 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCWinCOFFStreamer.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/FormattedStream.h"
@@ -176,19 +177,29 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
 /// by MachO. Beware!
 class AArch64ELFStreamer : public MCELFStreamer {
 public:
+  friend AArch64TargetELFStreamer;
   AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
                      std::unique_ptr<MCObjectWriter> OW,
                      std::unique_ptr<MCCodeEmitter> Emitter)
       : MCELFStreamer(Context, std::move(TAB), std::move(OW),
                       std::move(Emitter)),
-        LastEMS(EMS_None) {}
+        LastEMS(EMS_None) {
+    auto *TO = getContext().getTargetOptions();
+    ImplicitMapSyms = TO && TO->ImplicitMapSyms;
+  }
 
   void changeSection(MCSection *Section, uint32_t Subsection = 0) override {
-    // We have to keep track of the mapping symbol state of any sections we
-    // use. Each one should start off as EMS_None, which is provided as the
-    // default constructor by DenseMap::lookup.
+    // Save the mapping symbol state for potential reuse when revisiting the
+    // section. When ImplicitMapSyms is true, the initial state is
+    // EMS_A64 for text sections and EMS_Data for the others.
     LastMappingSymbols[getCurrentSection().first] = LastEMS;
-    LastEMS = LastMappingSymbols.lookup(Section);
+    auto It = LastMappingSymbols.find(Section);
+    if (It != LastMappingSymbols.end())
+      LastEMS = It->second;
+    else if (ImplicitMapSyms)
+      LastEMS = Section->isText() ? EMS_A64 : EMS_Data;
+    else
+      LastEMS = EMS_None;
 
     MCELFStreamer::changeSection(Section, Subsection);
   }
@@ -269,15 +280,15 @@ class AArch64ELFStreamer : public MCELFStreamer {
     LastEMS = EMS_A64;
   }
 
-  void emitMappingSymbol(StringRef Name) {
+  MCSymbol *emitMappingSymbol(StringRef Name) {
     auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
     emitLabel(Symbol);
-    Symbol->setType(ELF::STT_NOTYPE);
-    Symbol->setBinding(ELF::STB_LOCAL);
+    return Symbol;
   }
 
   DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
   ElfMappingSymbol LastEMS;
+  bool ImplicitMapSyms;
 };
 } // end anonymous namespace
 
@@ -299,6 +310,53 @@ void AArch64TargetELFStreamer::finish() {
   AArch64ELFStreamer &S = getStreamer();
   MCContext &Ctx = S.getContext();
   auto &Asm = S.getAssembler();
+
+  // If ImplicitMapSyms is specified, ensure that text sections end with
+  // the A64 state while non-text sections end with the data state. When
+  // sections are combined by the linker, the subsequent section will start with
+  // the right state. The ending mapping symbol is added right after the last
+  // symbol relative to the section. When a dumb linker combines (.text.0; .word
+  // 0) and (.text.1; .word 0), the ending $x of .text.0 precedes the $d of
+  // .text.1, even if they have the same address.
+  if (S.ImplicitMapSyms) {
+    auto &Syms = Asm.getSymbols();
+    const size_t NumSyms = Syms.size();
+    DenseMap<MCSection *, MCSymbol *> EndMappingSym;
+    for (MCSection &Sec : Asm) {
+      S.switchSection(&Sec);
+      if (S.LastEMS == (Sec.isText() ? AArch64ELFStreamer::EMS_Data
+                                     : AArch64ELFStreamer::EMS_A64))
+        EndMappingSym.try_emplace(
+            &Sec, S.emitMappingSymbol(Sec.isText() ? "$x" : "$d"));
+    }
+    if (Syms.size() != NumSyms) {
+      SmallVector<const MCSymbol *, 0> NewSyms;
+      DenseMap<MCSection *, size_t> Cnt;
+      Syms.truncate(NumSyms);
+      for (const MCSymbol *Sym : Syms)
+        if (Sym->isInSection())
+          ++Cnt[&Sym->getSection()];
+      SmallVector<size_t, 0> Idx;
+      for (auto [I, Sym] : llvm::enumerate(Syms)) {
+        NewSyms.push_back(Sym);
+        MCSection *Sec = Sym->isInSection() ? &Sym->getSection() : nullptr;
+        if (!Sec || --Cnt[Sec])
+          continue;
+        // `Sym` is the last symbol relative to `Sec`. Add the ending mapping
+        // symbol, if needed, after `Sym`.
+        if (auto *MapSym = EndMappingSym.lookup(Sec)) {
+          NewSyms.push_back(MapSym);
+          Idx.push_back(I);
+        }
+      }
+      Syms = std::move(NewSyms);
+      // F.second holds the number of symbols added before the FILE symbol.
+      // Take into account the inserted mapping symbols.
+      for (auto &F : S.getWriter().getFileNames())
+        F.second += llvm::lower_bound(Idx, F.second) - Idx.begin();
+    }
+  }
+
   MCSectionELF *MemtagSec = nullptr;
   for (const MCSymbol &Symbol : Asm.symbols()) {
     const auto &Sym = cast<MCSymbolELF>(Symbol);
diff --git a/llvm/test/MC/AArch64/mapping-across-sections.s b/llvm/test/MC/AArch64/mapping-across-sections.s
index f453c86d45fb62..e688c770cc960d 100644
--- a/llvm/test/MC/AArch64/mapping-across-sections.s
+++ b/llvm/test/MC/AArch64/mapping-across-sections.s
@@ -1,5 +1,10 @@
 // RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-objdump -t - | FileCheck %s --match-full-lines
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -implicit-mapsyms %s | llvm-objdump -t - | FileCheck %s --check-prefix=CHECK1 --match-full-lines
 
+/// The test covers many state transitions. Let's use the first state and the last state to describe a section.
+/// .text goes through cd -> dd -> cc -> dd.
+/// .data goes through dd -> dc -> cd.
+.file "0.s"
 .section .text1,"ax"
 add w0, w0, w0
 
@@ -12,29 +17,61 @@ add w0, w0, w0
 .popsection
 
 .text
-add w1, w1, w1
+.word 42
 
 .section .text1,"ax"
 add w1, w1, w1
 
+.text
+add w1, w1, w1
+
+.section .data,"aw"
+.word 42
+add w0, w0, w0
+
 .text
 .word 42
 
+## .rodata and subsequent symbols should be after the FILE symbol of "1.s".
+.file "1.s"
 .section .rodata,"a"
 .word 42
 add w0, w0, w0
 
+.section .data,"aw"
+add w0, w0, w0
+.word 42
+
+.text
+
 .ident "clang"
 .section ".note.GNU-stack","", at progbits
 
 // CHECK:      SYMBOL TABLE:
-// CHECK-NEXT: 0000000000000000 l       .text1 0000000000000000 $x
-// CHECK-NEXT: 0000000000000000 l       .text  0000000000000000 $x
-// CHECK-NEXT: 0000000000000004 l       .text  0000000000000000 $d
-// CHECK-NEXT: 0000000000000000 l       .data  0000000000000000 $d
-// CHECK-NEXT: 0000000000000008 l       .text  0000000000000000 $x
-// CHECK-NEXT: 000000000000000c l       .text  0000000000000000 $d
-// CHECK-NEXT: 0000000000000000 l       .rodata        0000000000000000 $d
-// CHECK-NEXT: 0000000000000004 l       .rodata        0000000000000000 $x
-// CHECK-NEXT: 0000000000000000 l       .comment       0000000000000000 $d
+// CHECK-NEXT: 0000000000000000 l    df *ABS*	0000000000000000 0.s
+// CHECK-NEXT: 0000000000000000 l       .text1	0000000000000000 $x
+// CHECK-NEXT: 0000000000000000 l       .text	0000000000000000 $x
+// CHECK-NEXT: 0000000000000004 l       .text	0000000000000000 $d
+// CHECK-NEXT: 0000000000000000 l       .data	0000000000000000 $d
+// CHECK-NEXT: 000000000000000c l       .text	0000000000000000 $x
+// CHECK-NEXT: 0000000000000008 l       .data	0000000000000000 $x
+// CHECK-NEXT: 0000000000000010 l       .text	0000000000000000 $d
+// CHECK-NEXT: 0000000000000000 l    df *ABS*	0000000000000000 1.s
+// CHECK-NEXT: 0000000000000000 l       .rodata	0000000000000000 $d
+// CHECK-NEXT: 0000000000000004 l       .rodata	0000000000000000 $x
+// CHECK-NEXT: 0000000000000010 l       .data	0000000000000000 $d
+// CHECK-NEXT: 0000000000000000 l       .comment	0000000000000000 $d
 // CHECK-NOT:  {{.}}
+
+// CHECK1:      SYMBOL TABLE:
+// CHECK1-NEXT: 0000000000000000 l    df *ABS*	0000000000000000 0.s
+// CHECK1-NEXT: 0000000000000004 l       .text	0000000000000000 $d
+// CHECK1-NEXT: 000000000000000c l       .text	0000000000000000 $x
+// CHECK1-NEXT: 0000000000000008 l       .data	0000000000000000 $x
+// CHECK1-NEXT: 0000000000000010 l       .text	0000000000000000 $d
+// CHECK1-NEXT: 0000000000000014 l       .text	0000000000000000 $x
+// CHECK1-NEXT: 0000000000000000 l    df *ABS*	0000000000000000 1.s
+// CHECK1-NEXT: 0000000000000004 l       .rodata	0000000000000000 $x
+// CHECK1-NEXT: 0000000000000008 l       .rodata	0000000000000000 $d
+// CHECK1-NEXT: 0000000000000010 l       .data	0000000000000000 $d
+// CHECK1-NOT:  {{.}}



More information about the llvm-branch-commits mailing list